HADOOP-7947. Validate XMLs if a relevant tool is available, when using scripts (Kengo Seki via aw)

Allen Wittenauer 2015-05-28 11:01:00 -07:00
@ -48,6 +48,9 @@ Trunk (Unreleased)
HADOOP-9642. Configuration to resolve environment variables via
${env.VARIABLE} references (Kengo Seki via aw)
HADOOP-7947. Validate XMLs if a relevant tool is available, when using
scripts (Kengo Seki via aw)
HADOOP-8017. Configure hadoop-main pom to get rid of M2E plugin execution

@ -27,6 +27,7 @@ function hadoop_usage()
echo " libraries availability"
echo " classpath prints the class path needed to get the"
echo " Hadoop jar and the required libraries"
echo " conftest validate configuration XML files"
echo " credential interact with credential providers"
echo " daemonlog get/set the log level for each daemon"
echo " distch path:owner:group:permisson"
@ -127,6 +128,9 @@ case ${COMMAND} in
hadoop_do_classpath_subcommand CLASS "$@"

@ -0,0 +1,368 @@
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
package org.apache.hadoop.util;
import java.io.File;
import java.io.FileFilter;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Stack;
import java.util.Map.Entry;
import javax.xml.namespace.QName;
import javax.xml.stream.XMLEventReader;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.events.Attribute;
import javax.xml.stream.events.Characters;
import javax.xml.stream.events.StartElement;
import javax.xml.stream.events.XMLEvent;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.MissingArgumentException;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.hadoop.classification.InterfaceAudience;
* This class validates configuration XML files in ${HADOOP_CONF_DIR} or
* specified ones.
public final class ConfTest {
private static final String USAGE =
"Usage: hadoop conftest [-conffile <path>|-h|--help]\n"
+ " Options:\n"
+ " \n"
+ " -conffile <path>\n"
+ " If not specified, the files in ${HADOOP_CONF_DIR}\n"
+ " whose name end with .xml will be verified.\n"
+ " If specified, that path will be verified.\n"
+ " You can specify either a file or directory, and\n"
+ " if a directory specified, the files in that directory\n"
+ " whose name end with .xml will be verified.\n"
+ " You can specify this option multiple times.\n"
+ " -h, --help Print this help";
private static final String HADOOP_CONF_DIR = "HADOOP_CONF_DIR";
protected ConfTest() {
private static List<NodeInfo> parseConf(InputStream in)
throws XMLStreamException {
QName configuration = new QName("configuration");
QName property = new QName("property");
List<NodeInfo> nodes = new ArrayList<NodeInfo>();
Stack<NodeInfo> parsed = new Stack<NodeInfo>();
XMLInputFactory factory = XMLInputFactory.newInstance();
XMLEventReader reader = factory.createXMLEventReader(in);
while (reader.hasNext()) {
XMLEvent event = reader.nextEvent();
if (event.isStartElement()) {
StartElement currentElement = event.asStartElement();
NodeInfo currentNode = new NodeInfo(currentElement);
if (parsed.isEmpty()) {
if (!currentElement.getName().equals(configuration)) {
return null;
} else {
NodeInfo parentNode = parsed.peek();
QName parentName = parentNode.getStartElement().getName();
if (parentName.equals(configuration)
&& currentNode.getStartElement().getName().equals(property)) {
Iterator<Attribute> it = currentElement.getAttributes();
while (it.hasNext()) {
} else if (parentName.equals(property)) {
} else if (event.isEndElement()) {
NodeInfo node = parsed.pop();
if (parsed.size() == 1) {
} else if (event.isCharacters()) {
if (2 < parsed.size()) {
NodeInfo parentNode = parsed.pop();
StartElement parentElement = parentNode.getStartElement();
NodeInfo grandparentNode = parsed.peek();
if (grandparentNode.getElement(parentElement) == null) {
grandparentNode.setElement(parentElement, event.asCharacters());
return nodes;
public static List<String> checkConf(InputStream in) {
List<NodeInfo> nodes = null;
List<String> errors = new ArrayList<String>();
try {
nodes = parseConf(in);
if (nodes == null) {
errors.add("bad conf file: top-level element not <configuration>");
} catch (XMLStreamException e) {
errors.add("bad conf file: " + e.getMessage());
if (!errors.isEmpty()) {
return errors;
Map<String, List<Integer>> duplicatedProperties =
new HashMap<String, List<Integer>>();
for (NodeInfo node : nodes) {
StartElement element = node.getStartElement();
int line = element.getLocation().getLineNumber();
if (!element.getName().equals(new QName("property"))) {
errors.add(String.format("Line %d: element not <property>", line));
List<XMLEvent> events = node.getXMLEventsForQName(new QName("name"));
if (events == null) {
errors.add(String.format("Line %d: <property> has no <name>", line));
} else {
String v = null;
for (XMLEvent event : events) {
if (event.isAttribute()) {
v = ((Attribute) event).getValue();
} else {
Characters c = node.getElement(event.asStartElement());
if (c != null) {
v = c.getData();
if (v == null || v.isEmpty()) {
errors.add(String.format("Line %d: <property> has an empty <name>",
if (v != null && !v.isEmpty()) {
List<Integer> lines = duplicatedProperties.get(v);
if (lines == null) {
lines = new ArrayList<Integer>();
duplicatedProperties.put(v, lines);
events = node.getXMLEventsForQName(new QName("value"));
if (events == null) {
errors.add(String.format("Line %d: <property> has no <value>", line));
for (QName qName : node.getDuplicatedQNames()) {
if (!qName.equals(new QName("source"))) {
errors.add(String.format("Line %d: <property> has duplicated <%s>s",
line, qName));
for (Entry<String, List<Integer>> e : duplicatedProperties.entrySet()) {
List<Integer> lines = e.getValue();
if (1 < lines.size()) {
errors.add(String.format("Line %s: duplicated <property>s for %s",
StringUtils.join(", ", lines), e.getKey()));
return errors;
private static File[] listFiles(File dir) {
return dir.listFiles(new FileFilter() {
public boolean accept(File file) {
return file.isFile() && file.getName().endsWith(".xml");
public static void main(String[] args) throws IOException {
GenericOptionsParser genericParser = new GenericOptionsParser(args);
String[] remainingArgs = genericParser.getRemainingArgs();
Option conf = OptionBuilder.hasArg().create("conffile");
Option help = OptionBuilder.withLongOpt("help").create('h');
Options opts = new Options().addOption(conf).addOption(help);
CommandLineParser specificParser = new GnuParser();
CommandLine cmd = null;
try {
cmd = specificParser.parse(opts, remainingArgs);
} catch (MissingArgumentException e) {
terminate(1, "No argument specified for -conffile option");
} catch (ParseException e) {
terminate(1, USAGE);
if (cmd == null) {
terminate(1, "Failed to parse options");
if (cmd.hasOption('h')) {
terminate(0, USAGE);
List<File> files = new ArrayList<File>();
if (cmd.hasOption("conffile")) {
String[] values = cmd.getOptionValues("conffile");
for (String value : values) {
File confFile = new File(value);
if (confFile.isFile()) {
} else if (confFile.isDirectory()) {
for (File file : listFiles(confFile)) {
} else {
terminate(1, confFile.getAbsolutePath()
+ " is neither a file nor directory");
} else {
String confDirName = System.getenv(HADOOP_CONF_DIR);
if (confDirName == null) {
terminate(1, HADOOP_CONF_DIR + " does not defined");
File confDir = new File(confDirName);
if (!confDir.isDirectory()) {
terminate(1, HADOOP_CONF_DIR + " is not a directory");
files = Arrays.asList(listFiles(confDir));
if (files.isEmpty()) {
terminate(1, "No input file to validate");
boolean ok = true;
for (File file : files) {
String path = file.getAbsolutePath();
List<String> errors = checkConf(new FileInputStream(file));
if (errors.isEmpty()) {
System.out.println(path + ": valid");
} else {
ok = false;
System.err.println(path + ":");
for (String error : errors) {
System.err.println("\t" + error);
if (ok) {
} else {
terminate(1, "Invalid file exists");
private static void terminate(int status, String msg) {
class NodeInfo {
private StartElement startElement;
private List<Attribute> attributes = new ArrayList<Attribute>();
private Map<StartElement, Characters> elements =
new HashMap<StartElement, Characters>();
private Map<QName, List<XMLEvent>> qNameXMLEventsMap =
new HashMap<QName, List<XMLEvent>>();
public NodeInfo(StartElement startElement) {
this.startElement = startElement;
private void addQNameXMLEvent(QName qName, XMLEvent event) {
List<XMLEvent> events = qNameXMLEventsMap.get(qName);
if (events == null) {
events = new ArrayList<XMLEvent>();
qNameXMLEventsMap.put(qName, events);
public StartElement getStartElement() {
return startElement;
public void addAttribute(Attribute attribute) {
addQNameXMLEvent(attribute.getName(), attribute);
public Characters getElement(StartElement element) {
return elements.get(element);
public void addElement(StartElement element) {
setElement(element, null);
addQNameXMLEvent(element.getName(), element);
public void setElement(StartElement element, Characters text) {
elements.put(element, text);
public List<QName> getDuplicatedQNames() {
List<QName> duplicates = new ArrayList<QName>();
for (Map.Entry<QName, List<XMLEvent>> e : qNameXMLEventsMap.entrySet()) {
if (1 < e.getValue().size()) {
return duplicates;
public List<XMLEvent> getXMLEventsForQName(QName qName) {
return qNameXMLEventsMap.get(qName);

@ -0,0 +1,204 @@
package org.apache.hadoop.util;
import static org.junit.Assert.*;
import java.io.ByteArrayInputStream;
import java.util.List;
import org.junit.Test;
public class TestConfTest {
public void testEmptyConfiguration() {
String conf = "<configuration/>";
ByteArrayInputStream bais = new ByteArrayInputStream(conf.getBytes());
List<String> errors = ConfTest.checkConf(bais);
public void testValidConfiguration() {
String conf = "<configuration>\n"
+ "<property>\n"
+ "<name>foo</name>\n"
+ "<value>bar</value>\n"
+ "</property>\n"
+ "</configuration>";
ByteArrayInputStream bais = new ByteArrayInputStream(conf.getBytes());
List<String> errors = ConfTest.checkConf(bais);
public void testSourceDuplicationIsValid() {
String conf = "<configuration>\n"
+ "<property source='a'>\n"
+ "<name>foo</name>\n"
+ "<value>bar</value>\n"
+ "<source>b</source>\n"
+ "<source>c</source>\n"
+ "</property>\n"
+ "</configuration>";
ByteArrayInputStream bais = new ByteArrayInputStream(conf.getBytes());
List<String> errors = ConfTest.checkConf(bais);
public void testEmptyInput() {
String conf = "";
ByteArrayInputStream bais = new ByteArrayInputStream(conf.getBytes());
List<String> errors = ConfTest.checkConf(bais);
assertEquals(1, errors.size());
assertTrue(errors.get(0).startsWith("bad conf file: "));
public void testInvalidFormat() {
String conf = "<configuration>";
ByteArrayInputStream bais = new ByteArrayInputStream(conf.getBytes());
List<String> errors = ConfTest.checkConf(bais);
assertEquals(1, errors.size());
assertTrue(errors.get(0).startsWith("bad conf file: "));
public void testRootElementNotConfiguration() {
String conf = "<configurations/>";
ByteArrayInputStream bais = new ByteArrayInputStream(conf.getBytes());
List<String> errors = ConfTest.checkConf(bais);
assertEquals(1, errors.size());
assertEquals("bad conf file: top-level element not <configuration>", errors.get(0));
public void testSubElementNotProperty() {
String conf = "<configuration>\n"
+ "<foo/>\n"
+ "</configuration>";
ByteArrayInputStream bais = new ByteArrayInputStream(conf.getBytes());
List<String> errors = ConfTest.checkConf(bais);
assertEquals(1, errors.size());
assertEquals("Line 2: element not <property>", errors.get(0));
public void testPropertyHasNoName() {
String conf ="<configuration>\n"
+ "<property>\n"
+ "<value>foo</value>\n"
+ "</property>\n"
+ "</configuration>";
ByteArrayInputStream bais = new ByteArrayInputStream(conf.getBytes());
List<String> errors = ConfTest.checkConf(bais);
assertEquals(1, errors.size());
assertEquals("Line 2: <property> has no <name>", errors.get(0));
public void testPropertyHasEmptyName() {
String conf = "<configuration>\n"
+ "<property>\n"
+ "<name></name>\n"
+ "<value>foo</value>\n"
+ "</property>\n"
+ "</configuration>";
ByteArrayInputStream bais = new ByteArrayInputStream(conf.getBytes());
List<String> errors = ConfTest.checkConf(bais);
assertEquals(1, errors.size());
assertEquals("Line 2: <property> has an empty <name>", errors.get(0));
public void testPropertyHasNoValue() {
String conf ="<configuration>\n"
+ "<property>\n"
+ "<name>foo</name>\n"
+ "</property>\n"
+ "</configuration>";
ByteArrayInputStream bais = new ByteArrayInputStream(conf.getBytes());
List<String> errors = ConfTest.checkConf(bais);
assertEquals(1, errors.size());
assertEquals("Line 2: <property> has no <value>", errors.get(0));
public void testPropertyHasEmptyValue() {
String conf = "<configuration>\n"
+ "<property>\n"
+ "<name>foo</name>\n"
+ "<value></value>\n"
+ "</property>\n"
+ "</configuration>";
ByteArrayInputStream bais = new ByteArrayInputStream(conf.getBytes());
List<String> errors = ConfTest.checkConf(bais);
public void testPropertyHasDuplicatedAttributeAndElement() {
String conf = "<configuration>\n"
+ "<property name='foo'>\n"
+ "<name>bar</name>\n"
+ "<value>baz</value>\n"
+ "</property>\n"
+ "</configuration>";
ByteArrayInputStream bais = new ByteArrayInputStream(conf.getBytes());
List<String> errors = ConfTest.checkConf(bais);
assertEquals(1, errors.size());
assertEquals("Line 2: <property> has duplicated <name>s", errors.get(0));
public void testPropertyHasDuplicatedElements() {
String conf = "<configuration>\n"
+ "<property>\n"
+ "<name>foo</name>\n"
+ "<name>bar</name>\n"
+ "<value>baz</value>\n"
+ "</property>\n"
+ "</configuration>";
ByteArrayInputStream bais = new ByteArrayInputStream(conf.getBytes());
List<String> errors = ConfTest.checkConf(bais);
assertEquals(1, errors.size());
assertEquals("Line 2: <property> has duplicated <name>s", errors.get(0));
public void testDuplicatedProperties() {
String conf = "<configuration>\n"
+ "<property>\n"
+ "<name>foo</name>\n"
+ "<value>bar</value>\n"
+ "</property>\n"
+ "<property>\n"
+ "<name>foo</name>\n"
+ "<value>baz</value>\n"
+ "</property>\n"
+ "</configuration>";
ByteArrayInputStream bais = new ByteArrayInputStream(conf.getBytes());
List<String> errors = ConfTest.checkConf(bais);
assertEquals(1, errors.size());
assertEquals("Line 2, 6: duplicated <property>s for foo", errors.get(0));