[jira] [HBASE-4867] A tool to merge configuration files

Summary: With our cluster configuration setup it would be good to have a tool
that would merge HBase configuration files so that files appearing later in the
list would override properties specified in earlier files. This way we could
merge an application-specific configuration file with a cluster-specific
configuration file (with the latter overriding the former) and produce a single
HBase configuration file to install on the cluster.

Test Plan: Run the tool on two configuration files (common and
cluster-specific). Use the resulting configuration on a dev cluster.

Reviewers: todd, Karthik, tedyu, stack, JIRA

Reviewed By: Karthik

CC: Karthik, mbautin, todd

Differential Revision: 537

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1208832 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
karthik 2011-11-30 22:12:22 +00:00
parent 54e463f519
commit a9ca67a2c5
1 changed files with 138 additions and 0 deletions

View File

@ -0,0 +1,138 @@
#!/usr/bin/env python
'''
Merges Hadoop/HBase configuration files in the given order, so that options
specified in later configuration files override those specified in earlier
files.
'''
import os
import re
import sys
import textwrap
from optparse import OptionParser
from xml.dom.minidom import parse, getDOMImplementation
class MergeConfTool:
'''
Merges the given set of Hadoop/HBase configuration files, with later files
overriding earlier ones.
'''
INDENT = ' ' * 2
# Description text is inside configuration, property, and description tags.
DESC_INDENT = INDENT * 3
def main(self):
'''The main entry point for the configuration merge tool.'''
self.parse_options()
self.merge()
def parse_options(self):
'''Parses command-line options.'''
parser = OptionParser(usage='%prog <input_conf_files> -o <output_file>')
parser.add_option('-o', '--output_file',
help='Destination configuration file')
opts, input_files = parser.parse_args()
if not opts.output_file:
self.fatal('--output_file is not specified')
if not input_files:
self.fatal('No input files specified')
for f_path in input_files:
if not os.path.isfile(f_path):
self.fatal('Input file %s does not exist' % f_path)
self.input_files = input_files
self.output_file = opts.output_file
def merge(self):
'''Merges input configuration files into the output file.'''
values = {} # Conf key to values
source_files = {} # Conf key to the file name where the value came from
descriptions = {} # Conf key to description (optional)
# Read input files in the given order and update configuration maps
for f_path in self.input_files:
self.current_file = f_path
f_basename = os.path.basename(f_path)
f_dom = parse(f_path)
for property in f_dom.getElementsByTagName('property'):
self.current_property = property
name = self.element_text('name')
value = self.element_text('value')
values[name] = value
source_files[name] = f_basename
if property.getElementsByTagName('description'):
descriptions[name] = self.element_text('description')
# Create the output configuration file
dom_impl = getDOMImplementation()
self.merged_conf = dom_impl.createDocument(None, 'configuration', None)
for k in sorted(values.keys()):
new_property = self.merged_conf.createElement('property')
c = self.merged_conf.createComment('from ' + source_files[k])
new_property.appendChild(c)
self.append_text_child(new_property, 'name', k)
self.append_text_child(new_property, 'value', values[k])
description = descriptions.get(k, None)
if description:
description = ' '.join(description.strip().split())
textwrap_kwargs = {}
if sys.version_info >= (2, 6):
textwrap_kwargs = dict(break_on_hyphens=False)
description = ('\n' + self.DESC_INDENT).join(
textwrap.wrap(description, 80 - len(self.DESC_INDENT),
break_long_words=False, **textwrap_kwargs))
self.append_text_child(new_property, 'description', description)
self.merged_conf.documentElement.appendChild(new_property)
pretty_conf = self.merged_conf.toprettyxml(indent=self.INDENT)
# Remove space before and after names and values. This way we don't have
# to worry about leading and trailing whitespace creeping in.
pretty_conf = re.sub(r'(?<=<name>)\s*', '', pretty_conf)
pretty_conf = re.sub(r'(?<=<value>)\s*', '', pretty_conf)
pretty_conf = re.sub(r'\s*(?=</name>)', '', pretty_conf)
pretty_conf = re.sub(r'\s*(?=</value>)', '', pretty_conf)
out_f = open(self.output_file, 'w')
try:
out_f.write(pretty_conf)
finally:
out_f.close()
def element_text(self, tag_name):
return self.whole_text(self.only_element(tag_name))
def fatal(self, msg):
print >> sys.stderr, msg
sys.exit(1)
def only_element(self, tag_name):
l = self.current_property.getElementsByTagName(tag_name)
if len(l) != 1:
self.fatal('Invalid property in %s, only one '
'"%s" element expected: %s' % (self.current_file, tag_name,
self.current_property.toxml()))
return l[0]
def whole_text(self, element):
if len(element.childNodes) > 1:
self.fatal('No more than one child expected in %s: %s' % (
self.current_file, element.toxml()))
if len(element.childNodes) == 1:
return element.childNodes[0].wholeText.strip()
return ''
def append_text_child(self, property_element, tag_name, value):
element = self.merged_conf.createElement(tag_name)
element.appendChild(self.merged_conf.createTextNode(value))
property_element.appendChild(element)
if __name__ == '__main__':
MergeConfTool().main()