HADOOP-14216. Improve Configuration XML Parsing Performance (jeagles)

This commit is contained in:
Jonathan Eagles 2017-03-29 10:12:02 -05:00
parent 84d787b9d5
commit 523f467d93
4 changed files with 263 additions and 130 deletions

View File

@ -313,6 +313,16 @@
<groupId>com.fasterxml.jackson.core</groupId> <groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId> <artifactId>jackson-databind</artifactId>
</dependency> </dependency>
<dependency>
<groupId>org.codehaus.woodstox</groupId>
<artifactId>stax2-api</artifactId>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>com.fasterxml</groupId>
<artifactId>aalto-xml</artifactId>
<scope>compile</scope>
</dependency>
</dependencies> </dependencies>
<build> <build>

View File

@ -18,6 +18,7 @@
package org.apache.hadoop.conf; package org.apache.hadoop.conf;
import com.fasterxml.aalto.stax.InputFactoryImpl;
import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.core.JsonFactory;
import com.fasterxml.jackson.core.JsonGenerator; import com.fasterxml.jackson.core.JsonGenerator;
import com.google.common.annotations.VisibleForTesting; import com.google.common.annotations.VisibleForTesting;
@ -65,9 +66,11 @@ import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicReference; import java.util.concurrent.atomic.AtomicReference;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException; import javax.xml.parsers.ParserConfigurationException;
import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import javax.xml.transform.Transformer; import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException; import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory; import javax.xml.transform.TransformerFactory;
@ -93,14 +96,10 @@ import org.apache.hadoop.security.alias.CredentialProviderFactory;
import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.util.StringInterner; import org.apache.hadoop.util.StringInterner;
import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.StringUtils;
import org.w3c.dom.Attr; import org.codehaus.stax2.XMLInputFactory2;
import org.w3c.dom.DOMException; import org.codehaus.stax2.XMLStreamReader2;
import org.w3c.dom.Document; import org.w3c.dom.Document;
import org.w3c.dom.Element; import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Text;
import org.xml.sax.SAXException;
import com.google.common.base.Preconditions; import com.google.common.base.Preconditions;
import com.google.common.base.Strings; import com.google.common.base.Strings;
@ -281,6 +280,12 @@ public class Configuration implements Iterable<Map.Entry<String,String>>,
*/ */
private Map<String, String[]> updatingResource; private Map<String, String[]> updatingResource;
/**
* Specify exact input factory to avoid time finding correct one.
* Factory is reusable across un-synchronized threads once initialized
*/
private static final XMLInputFactory2 factory = new InputFactoryImpl();
/** /**
* Class to keep the information about the keys which replace the deprecated * Class to keep the information about the keys which replace the deprecated
* ones. * ones.
@ -2613,8 +2618,8 @@ public class Configuration implements Iterable<Map.Entry<String,String>>,
return configMap; return configMap;
} }
private Document parse(DocumentBuilder builder, URL url) private XMLStreamReader parse(URL url)
throws IOException, SAXException { throws IOException, XMLStreamException {
if (!quietmode) { if (!quietmode) {
if (LOG.isDebugEnabled()) { if (LOG.isDebugEnabled()) {
LOG.debug("parsing URL " + url); LOG.debug("parsing URL " + url);
@ -2630,23 +2635,18 @@ public class Configuration implements Iterable<Map.Entry<String,String>>,
// with other users. // with other users.
connection.setUseCaches(false); connection.setUseCaches(false);
} }
return parse(builder, connection.getInputStream(), url.toString()); return parse(connection.getInputStream(), url.toString());
} }
private Document parse(DocumentBuilder builder, InputStream is, private XMLStreamReader parse(InputStream is,
String systemId) throws IOException, SAXException { String systemId) throws IOException, XMLStreamException {
if (!quietmode) { if (!quietmode) {
LOG.debug("parsing input stream " + is); LOG.debug("parsing input stream " + is);
} }
if (is == null) { if (is == null) {
return null; return null;
} }
try { return factory.createXMLStreamReader(systemId, is);
return (systemId == null) ? builder.parse(is) : builder.parse(is,
systemId);
} finally {
is.close();
}
} }
private void loadResources(Properties properties, private void loadResources(Properties properties,
@ -2666,37 +2666,20 @@ public class Configuration implements Iterable<Map.Entry<String,String>>,
} }
} }
private Resource loadResource(Properties properties, Resource wrapper, boolean quiet) { private Resource loadResource(Properties properties,
Resource wrapper, boolean quiet) {
String name = UNKNOWN_RESOURCE; String name = UNKNOWN_RESOURCE;
try { try {
Object resource = wrapper.getResource(); Object resource = wrapper.getResource();
name = wrapper.getName(); name = wrapper.getName();
XMLStreamReader2 reader = null;
DocumentBuilderFactory docBuilderFactory
= DocumentBuilderFactory.newInstance();
//ignore all comments inside the xml file
docBuilderFactory.setIgnoringComments(true);
//allow includes in the xml file
docBuilderFactory.setNamespaceAware(true);
try {
docBuilderFactory.setXIncludeAware(true);
} catch (UnsupportedOperationException e) {
LOG.error("Failed to set setXIncludeAware(true) for parser "
+ docBuilderFactory
+ ":" + e,
e);
}
DocumentBuilder builder = docBuilderFactory.newDocumentBuilder();
Document doc = null;
Element root = null;
boolean returnCachedProperties = false; boolean returnCachedProperties = false;
if (resource instanceof URL) { // an URL resource if (resource instanceof URL) { // an URL resource
doc = parse(builder, (URL)resource); reader = (XMLStreamReader2)parse((URL)resource);
} else if (resource instanceof String) { // a CLASSPATH resource } else if (resource instanceof String) { // a CLASSPATH resource
URL url = getResource((String)resource); URL url = getResource((String)resource);
doc = parse(builder, url); reader = (XMLStreamReader2)parse(url);
} else if (resource instanceof Path) { // a file resource } else if (resource instanceof Path) { // a file resource
// Can't use FileSystem API or we get an infinite loop // Can't use FileSystem API or we get an infinite loop
// since FileSystem uses Configuration API. Use java.io.File instead. // since FileSystem uses Configuration API. Use java.io.File instead.
@ -2706,103 +2689,187 @@ public class Configuration implements Iterable<Map.Entry<String,String>>,
if (!quiet) { if (!quiet) {
LOG.debug("parsing File " + file); LOG.debug("parsing File " + file);
} }
doc = parse(builder, new BufferedInputStream( reader = (XMLStreamReader2)parse(new BufferedInputStream(
new FileInputStream(file)), ((Path)resource).toString()); new FileInputStream(file)), ((Path)resource).toString());
} }
} else if (resource instanceof InputStream) { } else if (resource instanceof InputStream) {
doc = parse(builder, (InputStream) resource, null); reader = (XMLStreamReader2)parse((InputStream)resource, null);
returnCachedProperties = true; returnCachedProperties = true;
} else if (resource instanceof Properties) { } else if (resource instanceof Properties) {
overlay(properties, (Properties)resource); overlay(properties, (Properties)resource);
} else if (resource instanceof Element) {
root = (Element)resource;
} }
if (root == null) { if (reader == null) {
if (doc == null) {
if (quiet) { if (quiet) {
return null; return null;
} }
throw new RuntimeException(resource + " not found"); throw new RuntimeException(resource + " not found");
} }
root = doc.getDocumentElement();
}
Properties toAddTo = properties; Properties toAddTo = properties;
if(returnCachedProperties) { if(returnCachedProperties) {
toAddTo = new Properties(); toAddTo = new Properties();
} }
if (!"configuration".equals(root.getTagName()))
LOG.fatal("bad conf file: top-level element not <configuration>");
NodeList props = root.getChildNodes();
DeprecationContext deprecations = deprecationContext.get(); DeprecationContext deprecations = deprecationContext.get();
for (int i = 0; i < props.getLength(); i++) {
Node propNode = props.item(i); StringBuilder token = new StringBuilder();
if (!(propNode instanceof Element)) String confName = null;
continue; String confValue = null;
Element prop = (Element)propNode; boolean confFinal = false;
if ("configuration".equals(prop.getTagName())) { boolean fallbackAllowed = false;
loadResource(toAddTo, new Resource(prop, name), quiet); boolean fallbackEntered = false;
continue; boolean parseToken = false;
LinkedList<String> confSource = new LinkedList<String>();
while (reader.hasNext()) {
switch (reader.next()) {
case XMLStreamConstants.START_ELEMENT:
switch (reader.getLocalName()) {
case "property":
confName = null;
confValue = null;
confFinal = false;
confSource.clear();
// First test for short format configuration
int attrCount = reader.getAttributeCount();
for (int i = 0; i < attrCount; i++) {
String propertyAttr = reader.getAttributeLocalName(i);
if ("name".equals(propertyAttr)) {
confName = StringInterner.weakIntern(
reader.getAttributeValue(i));
} else if ("value".equals(propertyAttr)) {
confValue = StringInterner.weakIntern(
reader.getAttributeValue(i));
} else if ("final".equals(propertyAttr)) {
confFinal = "true".equals(reader.getAttributeValue(i));
} else if ("source".equals(propertyAttr)) {
confSource.add(StringInterner.weakIntern(
reader.getAttributeValue(i)));
} }
if (!"property".equals(prop.getTagName()))
LOG.warn("bad conf file: element not <property>");
String attr = null;
String value = null;
boolean finalParameter = false;
LinkedList<String> source = new LinkedList<String>();
Attr propAttr = prop.getAttributeNode("name");
if (propAttr != null)
attr = StringInterner.weakIntern(propAttr.getValue());
propAttr = prop.getAttributeNode("value");
if (propAttr != null)
value = StringInterner.weakIntern(propAttr.getValue());
propAttr = prop.getAttributeNode("final");
if (propAttr != null)
finalParameter = "true".equals(propAttr.getValue());
propAttr = prop.getAttributeNode("source");
if (propAttr != null)
source.add(StringInterner.weakIntern(propAttr.getValue()));
NodeList fields = prop.getChildNodes();
for (int j = 0; j < fields.getLength(); j++) {
Node fieldNode = fields.item(j);
if (!(fieldNode instanceof Element))
continue;
Element field = (Element)fieldNode;
if ("name".equals(field.getTagName()) && field.hasChildNodes())
attr = StringInterner.weakIntern(
((Text)field.getFirstChild()).getData().trim());
if ("value".equals(field.getTagName()) && field.hasChildNodes())
value = StringInterner.weakIntern(
((Text)field.getFirstChild()).getData());
if ("final".equals(field.getTagName()) && field.hasChildNodes())
finalParameter = "true".equals(((Text)field.getFirstChild()).getData());
if ("source".equals(field.getTagName()) && field.hasChildNodes())
source.add(StringInterner.weakIntern(
((Text)field.getFirstChild()).getData()));
} }
source.add(name); break;
case "name":
case "value":
case "final":
case "source":
parseToken = true;
token.setLength(0);
break;
case "include":
if (!"xi".equals(reader.getPrefix())) {
break;
}
// Determine href for xi:include
String confInclude = null;
attrCount = reader.getAttributeCount();
for (int i = 0; i < attrCount; i++) {
String attrName = reader.getAttributeLocalName(i);
if ("href".equals(attrName)) {
confInclude = reader.getAttributeValue(i);
}
}
if (confInclude == null) {
break;
}
// Determine if the included resource is a classpath resource
// otherwise fallback to a file resource
// xi:include are treated as inline and retain current source
URL include = getResource(confInclude);
if (include != null) {
Resource classpathResource = new Resource(include, name);
loadResource(properties, classpathResource, quiet);
} else {
File href = new File(confInclude);
if (!href.isAbsolute()) {
// Included resources are relative to the current resource
File baseFile = new File(name).getParentFile();
href = new File(baseFile, href.getPath());
}
if (!href.exists()) {
// Resource errors are non-fatal iff there is 1 xi:fallback
fallbackAllowed = true;
break;
}
Resource uriResource = new Resource(href.toURI().toURL(), name);
loadResource(properties, uriResource, quiet);
}
break;
case "fallback":
if (!"xi".equals(reader.getPrefix())) {
break;
}
fallbackEntered = true;
break;
case "configuration":
break;
default:
break;
}
break;
// Ignore this parameter if it has already been marked as 'final' case XMLStreamConstants.CHARACTERS:
if (attr != null) { if (parseToken) {
if (deprecations.getDeprecatedKeyMap().containsKey(attr)) { char[] text = reader.getTextCharacters();
token.append(text, reader.getTextStart(), reader.getTextLength());
}
break;
case XMLStreamConstants.END_ELEMENT:
switch (reader.getLocalName()) {
case "name":
if (token.length() > 0) {
confName = StringInterner.weakIntern(token.toString().trim());
}
break;
case "value":
if (token.length() > 0) {
confValue = StringInterner.weakIntern(token.toString());
}
break;
case "final":
confFinal = "true".equals(token.toString());
break;
case "source":
confSource.add(StringInterner.weakIntern(token.toString()));
break;
case "include":
if (!"xi".equals(reader.getPrefix())) {
break;
}
if (fallbackAllowed && !fallbackEntered) {
throw new IOException("Fetch fail on include with no "
+ "fallback while loading '" + name + "'");
}
fallbackAllowed = false;
fallbackEntered = false;
break;
case "property":
if (confName == null || (!fallbackAllowed && fallbackEntered)) {
break;
}
confSource.add(name);
DeprecatedKeyInfo keyInfo = DeprecatedKeyInfo keyInfo =
deprecations.getDeprecatedKeyMap().get(attr); deprecations.getDeprecatedKeyMap().get(confName);
if (keyInfo != null) {
keyInfo.clearAccessed(); keyInfo.clearAccessed();
for (String key : keyInfo.newKeys) { for (String key : keyInfo.newKeys) {
// update new keys with deprecated key's value // update new keys with deprecated key's value
loadProperty(toAddTo, name, key, value, finalParameter, loadProperty(toAddTo, name, key, confValue, confFinal,
source.toArray(new String[source.size()])); confSource.toArray(new String[confSource.size()]));
} }
} } else {
else { loadProperty(toAddTo, name, confName, confValue, confFinal,
loadProperty(toAddTo, name, attr, value, finalParameter, confSource.toArray(new String[confSource.size()]));
source.toArray(new String[source.size()])); }
} break;
default:
break;
}
default:
break;
} }
} }
reader.close();
if (returnCachedProperties) { if (returnCachedProperties) {
overlay(properties, toAddTo); overlay(properties, toAddTo);
@ -2812,13 +2879,7 @@ public class Configuration implements Iterable<Map.Entry<String,String>>,
} catch (IOException e) { } catch (IOException e) {
LOG.fatal("error parsing conf " + name, e); LOG.fatal("error parsing conf " + name, e);
throw new RuntimeException(e); throw new RuntimeException(e);
} catch (DOMException e) { } catch (XMLStreamException e) {
LOG.fatal("error parsing conf " + name, e);
throw new RuntimeException(e);
} catch (SAXException e) {
LOG.fatal("error parsing conf " + name, e);
throw new RuntimeException(e);
} catch (ParserConfigurationException e) {
LOG.fatal("error parsing conf " + name, e); LOG.fatal("error parsing conf " + name, e);
throw new RuntimeException(e); throw new RuntimeException(e);
} }

View File

@ -99,8 +99,20 @@ public class TestConfiguration extends TestCase {
out.close(); out.close();
} }
private void addInclude(String filename) throws IOException{ private void startInclude(String filename) throws IOException {
out.write("<xi:include href=\"" + filename + "\" xmlns:xi=\"http://www.w3.org/2001/XInclude\" />\n "); out.write("<xi:include href=\"" + filename + "\" xmlns:xi=\"http://www.w3.org/2001/XInclude\" >\n ");
}
private void endInclude() throws IOException{
out.write("</xi:include>\n ");
}
private void startFallback() throws IOException {
out.write("<xi:fallback>\n ");
}
private void endFallback() throws IOException {
out.write("</xi:fallback>\n ");
} }
public void testInputStreamResource() throws Exception { public void testInputStreamResource() throws Exception {
@ -507,7 +519,8 @@ public class TestConfiguration extends TestCase {
out=new BufferedWriter(new FileWriter(CONFIG)); out=new BufferedWriter(new FileWriter(CONFIG));
startConfig(); startConfig();
addInclude(CONFIG2); startInclude(CONFIG2);
endInclude();
appendProperty("e","f"); appendProperty("e","f");
appendProperty("g","h"); appendProperty("g","h");
endConfig(); endConfig();
@ -522,6 +535,44 @@ public class TestConfiguration extends TestCase {
tearDown(); tearDown();
} }
public void testIncludesWithFallback() throws Exception {
tearDown();
out=new BufferedWriter(new FileWriter(CONFIG2));
startConfig();
appendProperty("a","b");
appendProperty("c","d");
endConfig();
out=new BufferedWriter(new FileWriter(CONFIG));
startConfig();
startInclude(CONFIG2);
startFallback();
appendProperty("a", "b.fallback");
appendProperty("c", "d.fallback", true);
endFallback();
endInclude();
appendProperty("e","f");
appendProperty("g","h");
startInclude("MissingConfig.xml");
startFallback();
appendProperty("i", "j.fallback");
appendProperty("k", "l.fallback", true);
endFallback();
endInclude();
endConfig();
// verify that the includes file contains all properties
Path fileResource = new Path(CONFIG);
conf.addResource(fileResource);
assertEquals("b", conf.get("a"));
assertEquals("d", conf.get("c"));
assertEquals("f", conf.get("e"));
assertEquals("h", conf.get("g"));
assertEquals("j.fallback", conf.get("i"));
assertEquals("l.fallback", conf.get("k"));
tearDown();
}
public void testRelativeIncludes() throws Exception { public void testRelativeIncludes() throws Exception {
tearDown(); tearDown();
String relConfig = new File("./tmp/test-config.xml").getAbsolutePath(); String relConfig = new File("./tmp/test-config.xml").getAbsolutePath();
@ -536,7 +587,8 @@ public class TestConfiguration extends TestCase {
out = new BufferedWriter(new FileWriter(relConfig)); out = new BufferedWriter(new FileWriter(relConfig));
startConfig(); startConfig();
// Add the relative path instead of the absolute one. // Add the relative path instead of the absolute one.
addInclude(new File(relConfig2).getName()); startInclude(new File(relConfig2).getName());
endInclude();
appendProperty("c", "d"); appendProperty("c", "d");
endConfig(); endConfig();

View File

@ -869,6 +869,16 @@
<artifactId>core</artifactId> <artifactId>core</artifactId>
<version>3.1.1</version> <version>3.1.1</version>
</dependency> </dependency>
<dependency>
<groupId>org.codehaus.woodstox</groupId>
<artifactId>stax2-api</artifactId>
<version>3.1.4</version>
</dependency>
<dependency>
<groupId>com.fasterxml</groupId>
<artifactId>aalto-xml</artifactId>
<version>1.0.0</version>
</dependency>
<dependency> <dependency>
<groupId>org.codehaus.jackson</groupId> <groupId>org.codehaus.jackson</groupId>
<artifactId>jackson-mapper-asl</artifactId> <artifactId>jackson-mapper-asl</artifactId>