diff --git a/src/main/python/hbase/merge_conf.py b/src/main/python/hbase/merge_conf.py new file mode 100755 index 00000000000..c1ae827a878 --- /dev/null +++ b/src/main/python/hbase/merge_conf.py @@ -0,0 +1,138 @@ +#!/usr/bin/env python +''' +Merges Hadoop/HBase configuration files in the given order, so that options +specified in later configuration files override those specified in earlier +files. +''' + +import os +import re +import sys +import textwrap + +from optparse import OptionParser +from xml.dom.minidom import parse, getDOMImplementation + + +class MergeConfTool: + ''' + Merges the given set of Hadoop/HBase configuration files, with later files + overriding earlier ones. + ''' + + INDENT = ' ' * 2 + + # Description text is inside configuration, property, and description tags. + DESC_INDENT = INDENT * 3 + + def main(self): + '''The main entry point for the configuration merge tool.''' + self.parse_options() + self.merge() + + def parse_options(self): + '''Parses command-line options.''' + parser = OptionParser(usage='%prog -o ') + parser.add_option('-o', '--output_file', + help='Destination configuration file') + opts, input_files = parser.parse_args() + if not opts.output_file: + self.fatal('--output_file is not specified') + if not input_files: + self.fatal('No input files specified') + for f_path in input_files: + if not os.path.isfile(f_path): + self.fatal('Input file %s does not exist' % f_path) + self.input_files = input_files + self.output_file = opts.output_file + + def merge(self): + '''Merges input configuration files into the output file.''' + values = {} # Conf key to values + source_files = {} # Conf key to the file name where the value came from + descriptions = {} # Conf key to description (optional) + + # Read input files in the given order and update configuration maps + for f_path in self.input_files: + self.current_file = f_path + f_basename = os.path.basename(f_path) + f_dom = parse(f_path) + for property in f_dom.getElementsByTagName('property'): + self.current_property = property + name = self.element_text('name') + value = self.element_text('value') + values[name] = value + source_files[name] = f_basename + + if property.getElementsByTagName('description'): + descriptions[name] = self.element_text('description') + + # Create the output configuration file + dom_impl = getDOMImplementation() + self.merged_conf = dom_impl.createDocument(None, 'configuration', None) + for k in sorted(values.keys()): + new_property = self.merged_conf.createElement('property') + c = self.merged_conf.createComment('from ' + source_files[k]) + new_property.appendChild(c) + self.append_text_child(new_property, 'name', k) + self.append_text_child(new_property, 'value', values[k]) + + description = descriptions.get(k, None) + if description: + description = ' '.join(description.strip().split()) + textwrap_kwargs = {} + if sys.version_info >= (2, 6): + textwrap_kwargs = dict(break_on_hyphens=False) + description = ('\n' + self.DESC_INDENT).join( + textwrap.wrap(description, 80 - len(self.DESC_INDENT), + break_long_words=False, **textwrap_kwargs)) + self.append_text_child(new_property, 'description', description) + self.merged_conf.documentElement.appendChild(new_property) + + pretty_conf = self.merged_conf.toprettyxml(indent=self.INDENT) + + # Remove space before and after names and values. This way we don't have + # to worry about leading and trailing whitespace creeping in. + pretty_conf = re.sub(r'(?<=)\s*', '', pretty_conf) + pretty_conf = re.sub(r'(?<=)\s*', '', pretty_conf) + pretty_conf = re.sub(r'\s*(?=)', '', pretty_conf) + pretty_conf = re.sub(r'\s*(?=)', '', pretty_conf) + + out_f = open(self.output_file, 'w') + try: + out_f.write(pretty_conf) + finally: + out_f.close() + + def element_text(self, tag_name): + return self.whole_text(self.only_element(tag_name)) + + def fatal(self, msg): + print >> sys.stderr, msg + sys.exit(1) + + def only_element(self, tag_name): + l = self.current_property.getElementsByTagName(tag_name) + if len(l) != 1: + self.fatal('Invalid property in %s, only one ' + '"%s" element expected: %s' % (self.current_file, tag_name, + self.current_property.toxml())) + return l[0] + + def whole_text(self, element): + if len(element.childNodes) > 1: + self.fatal('No more than one child expected in %s: %s' % ( + self.current_file, element.toxml())) + if len(element.childNodes) == 1: + return element.childNodes[0].wholeText.strip() + return '' + + def append_text_child(self, property_element, tag_name, value): + element = self.merged_conf.createElement(tag_name) + element.appendChild(self.merged_conf.createTextNode(value)) + property_element.appendChild(element) + + +if __name__ == '__main__': + MergeConfTool().main() +