From f34235a5df58bcafed712037763f9ad49d6bf4d2 Mon Sep 17 00:00:00 2001 From: David Goodger Date: Wed, 1 Jan 2003 02:36:01 +0000 Subject: [PATCH] update from latest Docutils code --- docutils/.cvsignore | 1 - docutils/core.py | 9 +- docutils/frontend.py | 35 +- docutils/io.py | 66 +- docutils/languages/.cvsignore | 1 - docutils/languages/it.py | 60 ++ docutils/parsers/.cvsignore | 1 - docutils/parsers/rst/.cvsignore | 1 - docutils/parsers/rst/__init__.py | 33 +- docutils/parsers/rst/directives/.cvsignore | 1 - docutils/parsers/rst/directives/misc.py | 11 +- docutils/parsers/rst/languages/.cvsignore | 1 - docutils/parsers/rst/languages/it.py | 46 ++ docutils/parsers/rst/states.py | 29 +- docutils/readers/.cvsignore | 1 - docutils/readers/__init__.py | 3 +- docutils/readers/pep.py | 22 +- docutils/readers/python/__init__.py | 19 + docutils/readers/python/moduleparser.py | 780 +++++++++++++++++++++ docutils/transforms/.cvsignore | 1 - docutils/transforms/misc.py | 33 + docutils/transforms/peps.py | 23 +- docutils/utils.py | 9 + docutils/writers/.cvsignore | 1 - docutils/writers/docutils_xml.py | 22 +- docutils/writers/html4css1.py | 10 +- 26 files changed, 1105 insertions(+), 114 deletions(-) delete mode 100644 docutils/.cvsignore delete mode 100644 docutils/languages/.cvsignore create mode 100644 docutils/languages/it.py delete mode 100644 docutils/parsers/.cvsignore delete mode 100644 docutils/parsers/rst/.cvsignore delete mode 100644 docutils/parsers/rst/directives/.cvsignore delete mode 100644 docutils/parsers/rst/languages/.cvsignore create mode 100644 docutils/parsers/rst/languages/it.py delete mode 100644 docutils/readers/.cvsignore create mode 100644 docutils/readers/python/__init__.py create mode 100644 docutils/readers/python/moduleparser.py delete mode 100644 docutils/transforms/.cvsignore create mode 100644 docutils/transforms/misc.py delete mode 100644 docutils/writers/.cvsignore diff --git a/docutils/.cvsignore b/docutils/.cvsignore deleted file mode 100644 index 0d20b6487..000000000 --- a/docutils/.cvsignore +++ /dev/null @@ -1 +0,0 @@ -*.pyc diff --git a/docutils/core.py b/docutils/core.py index fa99df483..f73ccf338 100644 --- a/docutils/core.py +++ b/docutils/core.py @@ -137,8 +137,9 @@ class Publisher: source_path = self.settings._source else: self.settings._source = source_path - self.source = self.source_class(self.settings, source=source, - source_path=source_path) + self.source = self.source_class( + source=source, source_path=source_path, + encoding=self.settings.input_encoding) def set_destination(self, destination=None, destination_path=None): if destination_path is None: @@ -146,8 +147,8 @@ class Publisher: else: self.settings._destination = destination_path self.destination = self.destination_class( - self.settings, destination=destination, - destination_path=destination_path) + destination=destination, destination_path=destination_path, + encoding=self.settings.output_encoding) def apply_transforms(self, document): document.transformer.populate_from_components( diff --git a/docutils/frontend.py b/docutils/frontend.py index 3c13f094e..029f2763a 100644 --- a/docutils/frontend.py +++ b/docutils/frontend.py @@ -303,33 +303,8 @@ class ConfigParser(CP.ConfigParser): The section DEFAULT is special. """ - try: - sectdict = self._ConfigParser__sections[section].copy() - except KeyError: - sectdict = {} - d = self._ConfigParser__defaults.copy() - d.update(sectdict) - # Update with the entry specific variables - if vars: - d.update(vars) - if raw: - return sectdict - # do the string interpolation - for option in sectdict.keys(): - rawval = sectdict[option] - value = rawval # Make it a pretty variable name - depth = 0 - while depth < 10: # Loop through this until it's done - depth += 1 - if value.find("%(") >= 0: - try: - value = value % d - except KeyError, key: - raise CP.InterpolationError(key, option, section, - rawval) - else: - break - if value.find("%(") >= 0: - raise CP.InterpolationDepthError(option, section, rawval) - sectdict[option] = value - return sectdict + section_dict = {} + if self.has_section(section): + for option in self.options(section): + section_dict[option] = self.get(section, option, raw, vars) + return section_dict diff --git a/docutils/io.py b/docutils/io.py index de8af4582..ba624f1e7 100644 --- a/docutils/io.py +++ b/docutils/io.py @@ -26,10 +26,20 @@ class Input(TransformSpec): default_source_path = None - def __init__(self, settings, source=None, source_path=None): - self.settings = settings - """A settings object with "input_encoding" and "output_encoding" - attributes (typically a `docutils.optik.Values` object).""" + def __init__(self, settings=None, source=None, source_path=None, + encoding=None): + self.encoding = encoding + """The character encoding for the input source.""" + + if settings: + if not encoding: + self.encoding = settings.input_encoding + import warnings, traceback + warnings.warn( + 'Setting input encoding via a "settings" struct is ' + 'deprecated; send encoding directly instead.\n%s' + % ''.join(traceback.format_list(traceback.extract_stack() + [-3:-1]))) self.source = source """The source of input data.""" @@ -44,7 +54,7 @@ class Input(TransformSpec): return '%s: source=%r, source_path=%r' % (self.__class__, self.source, self.source_path) - def read(self, reader): + def read(self): raise NotImplementedError def decode(self, data): @@ -57,10 +67,9 @@ class Input(TransformSpec): locale.setlocale(locale.LC_ALL, '') """ - if self.settings.input_encoding \ - and self.settings.input_encoding.lower() == 'unicode': + if self.encoding and self.encoding.lower() == 'unicode': return unicode(data) - encodings = [self.settings.input_encoding, 'utf-8'] + encodings = [self.encoding, 'utf-8'] try: encodings.append(locale.nl_langinfo(locale.CODESET)) except: @@ -97,10 +106,20 @@ class Output(TransformSpec): default_destination_path = None - def __init__(self, settings, destination=None, destination_path=None): - self.settings = settings - """A settings object with "input_encoding" and "output_encoding" - attributes (typically a `docutils.optik.Values` object).""" + def __init__(self, settings=None, destination=None, destination_path=None, + encoding=None): + self.encoding = encoding + """The character encoding for the output destination.""" + + if settings: + if not encoding: + self.encoding = settings.output_encoding + import warnings, traceback + warnings.warn( + 'Setting output encoding via a "settings" struct is ' + 'deprecated; send encoding directly instead.\n%s' + % ''.join(traceback.format_list(traceback.extract_stack() + [-3:-1]))) self.destination = destination """The destination for output data.""" @@ -119,11 +138,10 @@ class Output(TransformSpec): raise NotImplementedError def encode(self, data): - if self.settings.output_encoding \ - and self.settings.output_encoding.lower() == 'unicode': + if self.encoding and self.encoding.lower() == 'unicode': return data else: - return data.encode(self.settings.output_encoding or '') + return data.encode(self.encoding or '') class FileInput(Input): @@ -132,7 +150,8 @@ class FileInput(Input): Input for single, simple file-like objects. """ - def __init__(self, settings, source=None, source_path=None, autoclose=1): + def __init__(self, settings=None, source=None, source_path=None, + encoding=None, autoclose=1): """ :Parameters: - `source`: either a file-like object (which is read directly), or @@ -141,7 +160,7 @@ class FileInput(Input): - `autoclose`: close automatically after read (boolean); always false if `sys.stdin` is the source. """ - Input.__init__(self, settings, source, source_path) + Input.__init__(self, settings, source, source_path, encoding) self.autoclose = autoclose if source is None: if source_path: @@ -155,7 +174,7 @@ class FileInput(Input): except AttributeError: pass - def read(self, reader): + def read(self): """Read and decode a single file and return the data.""" data = self.source.read() if self.autoclose: @@ -172,8 +191,8 @@ class FileOutput(Output): Output for single, simple file-like objects. """ - def __init__(self, settings, destination=None, destination_path=None, - autoclose=1): + def __init__(self, settings=None, destination=None, destination_path=None, + encoding=None, autoclose=1): """ :Parameters: - `destination`: either a file-like object (which is written @@ -184,7 +203,8 @@ class FileOutput(Output): - `autoclose`: close automatically after write (boolean); always false if `sys.stdout` is the destination. """ - Output.__init__(self, settings, destination, destination_path) + Output.__init__(self, settings, destination, destination_path, + encoding) self.opened = 1 self.autoclose = autoclose if destination is None: @@ -226,7 +246,7 @@ class StringInput(Input): default_source_path = '' - def read(self, reader): + def read(self): """Decode and return the source string.""" return self.decode(self.source) @@ -253,7 +273,7 @@ class NullInput(Input): default_source_path = 'null input' - def read(self, reader): + def read(self): """Return a null string.""" return u'' diff --git a/docutils/languages/.cvsignore b/docutils/languages/.cvsignore deleted file mode 100644 index 0d20b6487..000000000 --- a/docutils/languages/.cvsignore +++ /dev/null @@ -1 +0,0 @@ -*.pyc diff --git a/docutils/languages/it.py b/docutils/languages/it.py new file mode 100644 index 000000000..101abeb96 --- /dev/null +++ b/docutils/languages/it.py @@ -0,0 +1,60 @@ +# Author: Nicola Larosa +# Contact: docutils@tekNico.net +# Revision: $Revision$ +# Date: $Date$ +# Copyright: This module has been placed in the public domain. + +""" +Italian-language mappings for language-dependent features of Docutils. +""" + +__docformat__ = 'reStructuredText' + + +from docutils import nodes + + +labels = { + 'author': 'Autore', + 'authors': 'Autori', + 'organization': 'Organizzazione', + 'address': 'Indirizzo', + 'contact': 'Contatti', + 'version': 'Versione', + 'revision': 'Revisione', + 'status': 'Status', + 'date': 'Data', + 'copyright': 'Copyright', + 'dedication': 'Dedica', + 'abstract': 'Riassunto', + 'attention': 'Attenzione!', + 'caution': 'Cautela!', + 'danger': '!PERICOLO!', + 'error': 'Errore', + 'hint': 'Suggerimento', + 'important': 'Importante', + 'note': 'Nota', + 'tip': 'Consiglio', + 'warning': 'Avvertenza', + 'contents': 'Indice'} +"""Mapping of node class name to label text.""" + +bibliographic_fields = { + 'autore': nodes.author, + 'autori': nodes.authors, + 'organizzazione': nodes.organization, + 'indirizzo': nodes.address, + 'contatti': nodes.contact, + 'versione': nodes.version, + 'revisione': nodes.revision, + 'status': nodes.status, + 'data': nodes.date, + 'copyright': nodes.copyright, + 'dedica': nodes.topic, + 'riassunto': nodes.topic} +"""Field name (lowcased) to node class name mapping for bibliographic fields +(field_list).""" + +author_separators = [';', ','] +"""List of separator strings for the 'Authors' bibliographic field. Tried in +order.""" diff --git a/docutils/parsers/.cvsignore b/docutils/parsers/.cvsignore deleted file mode 100644 index 0d20b6487..000000000 --- a/docutils/parsers/.cvsignore +++ /dev/null @@ -1 +0,0 @@ -*.pyc diff --git a/docutils/parsers/rst/.cvsignore b/docutils/parsers/rst/.cvsignore deleted file mode 100644 index 0d20b6487..000000000 --- a/docutils/parsers/rst/.cvsignore +++ /dev/null @@ -1 +0,0 @@ -*.pyc diff --git a/docutils/parsers/rst/__init__.py b/docutils/parsers/rst/__init__.py index ad60a319d..b242ad9cb 100644 --- a/docutils/parsers/rst/__init__.py +++ b/docutils/parsers/rst/__init__.py @@ -8,15 +8,16 @@ This is ``docutils.parsers.rst`` package. It exports a single class, `Parser`, the reStructuredText parser. + Usage ===== 1. Create a parser:: - parser = docutils.parsers.restructuredtext.Parser() + parser = docutils.parsers.rst.Parser() Several optional arguments may be passed to modify the parser's behavior. - Please see `docutils.parsers.Parser` for details. + Please see `Customizing the Parser`_ below for details. 2. Gather input (a multi-line string), by reading a file or the standard input:: @@ -25,7 +26,7 @@ Usage 3. Create a new empty `docutils.nodes.document` tree:: - document = docutils.utils.new_document(source) + document = docutils.utils.new_document(source, settings) See `docutils.utils.new_document()` for parameter details. @@ -33,6 +34,7 @@ Usage parser.parse(input, document) + Parser Overview =============== @@ -40,6 +42,31 @@ The reStructuredText parser is implemented as a state machine, examining its input one line at a time. To understand how the parser works, please first become familiar with the `docutils.statemachine` module, then see the `states` module. + + +Customizing the Parser +---------------------- + +Anything that isn't already customizable is that way simply because that type +of customizability hasn't been implemented yet. Patches welcome! + +When instantiating an object of the `Parser` class, two parameters may be +passed: ``rfc2822`` and ``inliner``. Pass ``rfc2822=1`` to enable an initial +RFC-2822 style header block, parsed as a "field_list" element (with "class" +attribute set to "rfc2822"). Currently this is the only body-level element +which is customizable without subclassing. (Tip: subclass `Parser` and change +its "state_classes" and "initial_state" attributes to refer to new classes. +Contact the author if you need more details.) + +The ``inliner`` parameter takes an instance of `states.Inliner` or a subclass. +It handles inline markup recognition. A common extension is the addition of +further implicit hyperlinks, like "RFC 2822". This can be done by subclassing +`states.Inliner`, adding a new method for the implicit markup, and adding a +``(pattern, method)`` pair to the "implicit_dispatch" attribute of the +subclass. See `states.Inliner.implicit_inline()` for details. Explicit +inline markup can be customized in a `states.Inliner` subclass via the +``patterns.initial`` and ``dispatch`` attributes (and new methods as +appropriate). """ __docformat__ = 'reStructuredText' diff --git a/docutils/parsers/rst/directives/.cvsignore b/docutils/parsers/rst/directives/.cvsignore deleted file mode 100644 index 0d20b6487..000000000 --- a/docutils/parsers/rst/directives/.cvsignore +++ /dev/null @@ -1 +0,0 @@ -*.pyc diff --git a/docutils/parsers/rst/directives/misc.py b/docutils/parsers/rst/directives/misc.py index 3ff9eb93f..cc8019886 100644 --- a/docutils/parsers/rst/directives/misc.py +++ b/docutils/parsers/rst/directives/misc.py @@ -11,15 +11,16 @@ __docformat__ = 'reStructuredText' import sys import os.path from urllib2 import urlopen, URLError -from docutils import nodes, statemachine, utils +from docutils import io, nodes, statemachine, utils from docutils.parsers.rst import directives, states def include(name, arguments, options, content, lineno, content_offset, block_text, state, state_machine): """Include a reST file as part of the content of this reST file.""" - source_dir = os.path.dirname( - os.path.abspath(state.document.current_source)) + source = state_machine.input_lines.source( + lineno - state_machine.input_offset - 1) + source_dir = os.path.dirname(os.path.abspath(source)) path = ''.join(arguments[0].splitlines()) if path.find(' ') != -1: error = state_machine.reporter.error( @@ -29,14 +30,14 @@ def include(name, arguments, options, content, lineno, path = os.path.normpath(os.path.join(source_dir, path)) path = utils.relative_path(None, path) try: - include_file = open(path) + include_file = io.FileInput( + source_path=path, encoding=state.document.settings.input_encoding) except IOError, error: severe = state_machine.reporter.severe( 'Problems with "%s" directive path:\n%s.' % (name, error), nodes.literal_block(block_text, block_text), line=lineno) return [severe] include_text = include_file.read() - include_file.close() if options.has_key('literal'): literal_block = nodes.literal_block(include_text, include_text, source=path) diff --git a/docutils/parsers/rst/languages/.cvsignore b/docutils/parsers/rst/languages/.cvsignore deleted file mode 100644 index 0d20b6487..000000000 --- a/docutils/parsers/rst/languages/.cvsignore +++ /dev/null @@ -1 +0,0 @@ -*.pyc diff --git a/docutils/parsers/rst/languages/it.py b/docutils/parsers/rst/languages/it.py new file mode 100644 index 000000000..fe7610eb0 --- /dev/null +++ b/docutils/parsers/rst/languages/it.py @@ -0,0 +1,46 @@ +# Author: Nicola Larosa +# Contact: docutils@tekNico.net +# Revision: $Revision$ +# Date: $Date$ +# Copyright: This module has been placed in the public domain. + +""" +Italian-language mappings for language-dependent features of +reStructuredText. +""" + +__docformat__ = 'reStructuredText' + + +directives = { + 'attenzione': 'attention', + 'cautela': 'caution', + 'pericolo': 'danger', + 'errore': 'error', + 'suggerimento': 'hint', + 'importante': 'important', + 'nota': 'note', + 'consiglio': 'tip', + 'avvertenza': 'warning', + 'argomento': 'topic', + 'blocco di linee': 'line-block', + 'parsed-literal': 'parsed-literal', + #'questions': 'questions', + #'qa': 'questions', + #'faq': 'questions', + 'meta': 'meta', + #'imagemap': 'imagemap', + 'immagine': 'image', + 'figura': 'figure', + 'includi': 'include', + 'grezzo': 'raw', + 'sostituisci': 'replace', + 'indice': 'contents', + 'seznum': 'sectnum', + 'section-numbering': 'sectnum', + 'target-notes': 'target-notes', + #'footnotes': 'footnotes', + #'citations': 'citations', + 'restructuredtext-test-directive': 'restructuredtext-test-directive'} +"""English name to registered (in directives/__init__.py) directive name +mapping.""" diff --git a/docutils/parsers/rst/states.py b/docutils/parsers/rst/states.py index 3f49a9954..fb95cdd6a 100644 --- a/docutils/parsers/rst/states.py +++ b/docutils/parsers/rst/states.py @@ -528,7 +528,7 @@ class Inliner: openers = '\'"([{<' closers = '\'")]}>' start_string_prefix = (r'((?<=^)|(?<=[-/: \n%s]))' % re.escape(openers)) - end_string_suffix = (r'((?=$)|(?=[-/:.,;!? \n%s]))' % re.escape(closers)) + end_string_suffix = (r'((?=$)|(?=[-/:.,;!? \n\x00%s]))' % re.escape(closers)) non_whitespace_before = r'(? len(underline): if len(underline) < 4: - msg = self.reporter.info( - 'Possible title underline, too short for the title.\n' - "Treating it as ordinary text because it's so short.", - line=lineno) - self.parent += msg + if self.state_machine.match_titles: + msg = self.reporter.info( + 'Possible title underline, too short for the title.\n' + "Treating it as ordinary text because it's so short.", + line=lineno) + self.parent += msg raise statemachine.TransitionCorrection('text') else: blocktext = context[0] + '\n' + self.state_machine.line @@ -2408,6 +2401,14 @@ class Text(RSTState): 'Title underline too short.', nodes.literal_block(blocktext, blocktext), line=lineno) messages.append(msg) + if not self.state_machine.match_titles: + blocktext = context[0] + '\n' + self.state_machine.line + msg = self.reporter.severe( + 'Unexpected section title.', + nodes.literal_block(blocktext, blocktext), line=lineno) + self.parent += messages + self.parent += msg + return [], next_state, [] style = underline[0] context[:] = [] self.section(title, source, style, lineno - 1, messages) diff --git a/docutils/readers/.cvsignore b/docutils/readers/.cvsignore deleted file mode 100644 index 0d20b6487..000000000 --- a/docutils/readers/.cvsignore +++ /dev/null @@ -1 +0,0 @@ -*.pyc diff --git a/docutils/readers/__init__.py b/docutils/readers/__init__.py index 70960b210..f12401ff0 100644 --- a/docutils/readers/__init__.py +++ b/docutils/readers/__init__.py @@ -61,8 +61,7 @@ class Reader(Component): if not self.parser: self.parser = parser self.settings = settings - # May modify self.parser, depending on input: - self.input = self.source.read(self) + self.input = self.source.read() self.parse() return self.document diff --git a/docutils/readers/pep.py b/docutils/readers/pep.py index 38d09f8c7..7b75b94e8 100644 --- a/docutils/readers/pep.py +++ b/docutils/readers/pep.py @@ -20,6 +20,15 @@ from docutils.transforms import peps, references from docutils.parsers import rst +class Inliner(rst.states.Inliner): + + """ + Extend `rst.Inliner` to for local PEP references. + """ + + pep_url = rst.states.Inliner.pep_url_local + + class Reader(standalone.Reader): supported = ('pep',) @@ -44,17 +53,10 @@ class Reader(standalone.Reader): settings_default_overrides = {'pep_references': 1, 'rfc_references': 1} + inliner_class = Inliner + def __init__(self, parser=None, parser_name=None): """`parser` should be ``None``.""" if parser is None: - parser = rst.Parser(rfc2822=1, inliner=Inliner()) + parser = rst.Parser(rfc2822=1, inliner=self.inliner_class()) standalone.Reader.__init__(self, parser, '') - - -class Inliner(rst.states.Inliner): - - """ - Extend `rst.Inliner` to for local PEP references. - """ - - pep_url = rst.states.Inliner.pep_url_local diff --git a/docutils/readers/python/__init__.py b/docutils/readers/python/__init__.py new file mode 100644 index 000000000..a346ce32a --- /dev/null +++ b/docutils/readers/python/__init__.py @@ -0,0 +1,19 @@ +# Author: David Goodger +# Contact: goodger@users.sourceforge.net +# Revision: $Revision$ +# Date: $Date$ +# Copyright: This module has been placed in the public domain. + +""" +This package contains the Python Source Reader modules. +""" + +__docformat__ = 'reStructuredText' + + +import sys +import docutils.readers + + +class Reader(docutils.readers.Reader): + pass diff --git a/docutils/readers/python/moduleparser.py b/docutils/readers/python/moduleparser.py new file mode 100644 index 000000000..2262ddbd5 --- /dev/null +++ b/docutils/readers/python/moduleparser.py @@ -0,0 +1,780 @@ +# Author: David Goodger +# Contact: goodger@users.sourceforge.net +# Revision: $Revision$ +# Date: $Date$ +# Copyright: This module has been placed in the public domain. + +""" +Parser for Python modules. + +The `parse_module()` function takes a module's text and file name, runs it +through the module parser (using compiler.py and tokenize.py) and produces a +"module documentation tree": a high-level AST full of nodes that are +interesting from an auto-documentation standpoint. For example, given this +module (x.py):: + + # comment + + '''Docstring''' + + '''Additional docstring''' + + __docformat__ = 'reStructuredText' + + a = 1 + '''Attribute docstring''' + + class C(Super): + + '''C's docstring''' + + class_attribute = 1 + '''class_attribute's docstring''' + + def __init__(self, text=None): + '''__init__'s docstring''' + + self.instance_attribute = (text * 7 + + ' whaddyaknow') + '''instance_attribute's docstring''' + + + def f(x, # parameter x + y=a*5, # parameter y + *args): # parameter args + '''f's docstring''' + return [x + item for item in args] + + f.function_attribute = 1 + '''f.function_attribute's docstring''' + +The module parser will produce this module documentation tree:: + + + + comment + + Docstring + + Additional docstring + + + 'reStructuredText' + + + 1 + + Attribute docstring + + + C's docstring + + + 1 + + class_attribute's docstring + + + __init__'s docstring + + + + + None + + + (text * 7 + ' whaddyaknow') + + instance_attribute's docstring + + + f's docstring + + + + # parameter x + + + a * 5 + + # parameter y + + + # parameter args + + + 1 + + f.function_attribute's docstring + +(Comments are not implemented yet.) + +compiler.parse() provides most of what's needed for this doctree, and +"tokenize" can be used to get the rest. We can determine the line number from +the compiler.parse() AST, and the TokenParser.rhs(lineno) method provides the +rest. + +The Docutils Python reader component will transform this module doctree into a +Python-specific Docutils doctree, and then a `stylist transform`_ will +further transform it into a generic doctree. Namespaces will have to be +compiled for each of the scopes, but I'm not certain at what stage of +processing. + +It's very important to keep all docstring processing out of this, so that it's +a completely generic and not tool-specific. + +> Why perform all of those transformations? Why not go from the AST to a +> generic doctree? Or, even from the AST to the final output? + +I want the docutils.readers.python.moduleparser.parse_module() function to +produce a standard documentation-oriented tree that can be used by any tool. +We can develop it together without having to compromise on the rest of our +design (i.e., HappyDoc doesn't have to be made to work like Docutils, and +vice-versa). It would be a higher-level version of what compiler.py provides. + +The Python reader component transforms this generic AST into a Python-specific +doctree (it knows about modules, classes, functions, etc.), but this is +specific to Docutils and cannot be used by HappyDoc or others. The stylist +transform does the final layout, converting Python-specific structures +("class" sections, etc.) into a generic doctree using primitives (tables, +sections, lists, etc.). This generic doctree does *not* know about Python +structures any more. The advantage is that this doctree can be handed off to +any of the output writers to create any output format we like. + +The latter two transforms are separate because I want to be able to have +multiple independent layout styles (multiple runtime-selectable "stylist +transforms"). Each of the existing tools (HappyDoc, pydoc, epydoc, Crystal, +etc.) has its own fixed format. I personally don't like the tables-based +format produced by these tools, and I'd like to be able to customize the +format easily. That's the goal of stylist transforms, which are independent +from the Reader component itself. One stylist transform could produce +HappyDoc-like output, another could produce output similar to module docs in +the Python library reference manual, and so on. + +It's for exactly this reason: + +>> It's very important to keep all docstring processing out of this, so that +>> it's a completely generic and not tool-specific. + +... but it goes past docstring processing. It's also important to keep style +decisions and tool-specific data transforms out of this module parser. + + +Issues +====== + +* At what point should namespaces be computed? Should they be part of the + basic AST produced by the ASTVisitor walk, or generated by another tree + traversal? + +* At what point should a distinction be made between local variables & + instance attributes in __init__ methods? + +* Docstrings are getting their lineno from their parents. Should the + TokenParser find the real line no's? + +* Comments: include them? How and when? Only full-line comments, or + parameter comments too? (See function "f" above for an example.) + +* Module could use more docstrings & refactoring in places. + +""" + +__docformat__ = 'reStructuredText' + +import sys +import compiler +import compiler.ast +import tokenize +import token +from compiler.consts import OP_ASSIGN +from compiler.visitor import ASTVisitor +from types import StringType, UnicodeType, TupleType + + +def parse_module(module_text, filename): + """Return a module documentation tree from `module_text`.""" + ast = compiler.parse(module_text) + token_parser = TokenParser(module_text) + visitor = ModuleVisitor(filename, token_parser) + compiler.walk(ast, visitor, walker=visitor) + return visitor.module + + +class Node: + + def __init__(self, node): + self.children = [] + """List of child nodes.""" + + self.lineno = node.lineno + """Line number of this node (or ``None``).""" + + def __str__(self, indent=' ', level=0): + return ''.join(['%s%s\n' % (indent * level, repr(self))] + + [child.__str__(indent, level+1) + for child in self.children]) + + def __repr__(self): + parts = [self.__class__.__name__] + for name, value in self.attlist(): + parts.append('%s="%s"' % (name, value)) + return '<%s>' % ' '.join(parts) + + def attlist(self, **atts): + if self.lineno is not None: + atts['lineno'] = self.lineno + attlist = atts.items() + attlist.sort() + return attlist + + def append(self, node): + self.children.append(node) + + def extend(self, node_list): + self.children.extend(node_list) + + +class TextNode(Node): + + def __init__(self, node, text): + Node.__init__(self, node) + self.text = trim_docstring(text) + + def __str__(self, indent=' ', level=0): + prefix = indent * (level + 1) + text = '\n'.join([prefix + line for line in self.text.splitlines()]) + return Node.__str__(self, indent, level) + text + '\n' + + +class Module(Node): + + def __init__(self, node, filename): + Node.__init__(self, node) + self.filename = filename + + def attlist(self): + return Node.attlist(self, filename=self.filename) + + +class Docstring(TextNode): pass + + +class Comment(TextNode): pass + + +class Import(Node): + + def __init__(self, node, names, from_name=None): + Node.__init__(self, node) + self.names = names + self.from_name = from_name + + def __str__(self, indent=' ', level=0): + prefix = indent * (level + 1) + lines = [] + for name, as in self.names: + if as: + lines.append('%s%s as %s' % (prefix, name, as)) + else: + lines.append('%s%s' % (prefix, name)) + text = '\n'.join(lines) + return Node.__str__(self, indent, level) + text + '\n' + + def attlist(self): + if self.from_name: + atts = {'from': self.from_name} + else: + atts = {} + return Node.attlist(self, **atts) + + +class Attribute(Node): + + def __init__(self, node, name): + Node.__init__(self, node) + self.name = name + + def attlist(self): + return Node.attlist(self, name=self.name) + + +class AttributeTuple(Node): + + def __init__(self, node, names): + Node.__init__(self, node) + self.names = names + + def attlist(self): + return Node.attlist(self, names=' '.join(self.names)) + + +class Expression(TextNode): + + def __str__(self, indent=' ', level=0): + prefix = indent * (level + 1) + return '%s%s%s\n' % (Node.__str__(self, indent, level), + prefix, self.text.encode('unicode-escape')) + + +class Function(Attribute): pass + + +class ParameterList(Node): pass + + +class Parameter(Attribute): pass + + +class ParameterTuple(AttributeTuple): + + def attlist(self): + return Node.attlist(self, names=normalize_parameter_name(self.names)) + + +class ExcessPositionalArguments(Parameter): pass + + +class ExcessKeywordArguments(Parameter): pass + + +class Default(Expression): pass + + +class Class(Node): + + def __init__(self, node, name, bases=None): + Node.__init__(self, node) + self.name = name + self.bases = bases or [] + + def attlist(self): + atts = {'name': self.name} + if self.bases: + atts['bases'] = ' '.join(self.bases) + return Node.attlist(self, **atts) + + +class Method(Function): pass + + +class BaseVisitor(ASTVisitor): + + def __init__(self, token_parser): + ASTVisitor.__init__(self) + self.token_parser = token_parser + self.context = [] + self.documentable = None + + def default(self, node, *args): + self.documentable = None + #print 'in default (%s)' % node.__class__.__name__ + #ASTVisitor.default(self, node, *args) + + def default_visit(self, node, *args): + #print 'in default_visit (%s)' % node.__class__.__name__ + ASTVisitor.default(self, node, *args) + + +class DocstringVisitor(BaseVisitor): + + def visitDiscard(self, node): + if self.documentable: + self.visit(node.expr) + + def visitConst(self, node): + if self.documentable: + if type(node.value) in (StringType, UnicodeType): + self.documentable.append(Docstring(node, node.value)) + else: + self.documentable = None + + def visitStmt(self, node): + self.default_visit(node) + + +class AssignmentVisitor(DocstringVisitor): + + def visitAssign(self, node): + visitor = AttributeVisitor(self.token_parser) + compiler.walk(node, visitor, walker=visitor) + if visitor.attributes: + self.context[-1].extend(visitor.attributes) + if len(visitor.attributes) == 1: + self.documentable = visitor.attributes[0] + else: + self.documentable = None + + +class ModuleVisitor(AssignmentVisitor): + + def __init__(self, filename, token_parser): + AssignmentVisitor.__init__(self, token_parser) + self.filename = filename + self.module = None + + def visitModule(self, node): + self.module = module = Module(node, self.filename) + if node.doc is not None: + module.append(Docstring(node, node.doc)) + self.context.append(module) + self.documentable = module + self.visit(node.node) + self.context.pop() + + def visitImport(self, node): + self.context[-1].append(Import(node, node.names)) + self.documentable = None + + def visitFrom(self, node): + self.context[-1].append( + Import(node, node.names, from_name=node.modname)) + self.documentable = None + + def visitFunction(self, node): + visitor = FunctionVisitor(self.token_parser) + compiler.walk(node, visitor, walker=visitor) + self.context[-1].append(visitor.function) + + def visitClass(self, node): + visitor = ClassVisitor(self.token_parser) + compiler.walk(node, visitor, walker=visitor) + self.context[-1].append(visitor.klass) + + +class AttributeVisitor(BaseVisitor): + + def __init__(self, token_parser): + BaseVisitor.__init__(self, token_parser) + self.attributes = [] + + def visitAssign(self, node): + # Don't visit the expression itself, just the attribute nodes: + for child in node.nodes: + self.dispatch(child) + expression_text = self.token_parser.rhs(node.lineno) + expression = Expression(node, expression_text) + for attribute in self.attributes: + attribute.append(expression) + + def visitAssName(self, node): + self.attributes.append(Attribute(node, node.name)) + + def visitAssTuple(self, node): + attributes = self.attributes + self.attributes = [] + self.default_visit(node) + names = [attribute.name for attribute in self.attributes] + att_tuple = AttributeTuple(node, names) + att_tuple.lineno = self.attributes[0].lineno + self.attributes = attributes + self.attributes.append(att_tuple) + + def visitAssAttr(self, node): + self.default_visit(node, node.attrname) + + def visitGetattr(self, node, suffix): + self.default_visit(node, node.attrname + '.' + suffix) + + def visitName(self, node, suffix): + self.attributes.append(Attribute(node, node.name + '.' + suffix)) + + +class FunctionVisitor(DocstringVisitor): + + in_function = 0 + function_class = Function + + def visitFunction(self, node): + if self.in_function: + self.documentable = None + # Don't bother with nested function definitions. + return + self.in_function = 1 + self.function = function = self.function_class(node, node.name) + if node.doc is not None: + function.append(Docstring(node, node.doc)) + self.context.append(function) + self.documentable = function + self.parse_parameter_list(node) + self.visit(node.code) + self.context.pop() + + def parse_parameter_list(self, node): + parameters = [] + special = [] + argnames = list(node.argnames) + if node.kwargs: + special.append(ExcessKeywordArguments(node, argnames[-1])) + argnames.pop() + if node.varargs: + special.append(ExcessPositionalArguments(node, argnames[-1])) + argnames.pop() + defaults = list(node.defaults) + defaults = [None] * (len(argnames) - len(defaults)) + defaults + function_parameters = self.token_parser.function_parameters( + node.lineno) + #print >>sys.stderr, function_parameters + for argname, default in zip(argnames, defaults): + if type(argname) is TupleType: + parameter = ParameterTuple(node, argname) + argname = normalize_parameter_name(argname) + else: + parameter = Parameter(node, argname) + if default: + parameter.append(Default(node, function_parameters[argname])) + parameters.append(parameter) + if parameters or special: + special.reverse() + parameters.extend(special) + parameter_list = ParameterList(node) + parameter_list.extend(parameters) + self.function.append(parameter_list) + + +class ClassVisitor(AssignmentVisitor): + + in_class = 0 + + def __init__(self, token_parser): + AssignmentVisitor.__init__(self, token_parser) + self.bases = [] + + def visitClass(self, node): + if self.in_class: + self.documentable = None + # Don't bother with nested class definitions. + return + self.in_class = 1 + #import mypdb as pdb + #pdb.set_trace() + for base in node.bases: + self.visit(base) + self.klass = klass = Class(node, node.name, self.bases) + if node.doc is not None: + klass.append(Docstring(node, node.doc)) + self.context.append(klass) + self.documentable = klass + self.visit(node.code) + self.context.pop() + + def visitGetattr(self, node, suffix=None): + if suffix: + name = node.attrname + '.' + suffix + else: + name = node.attrname + self.default_visit(node, name) + + def visitName(self, node, suffix=None): + if suffix: + name = node.name + '.' + suffix + else: + name = node.name + self.bases.append(name) + + def visitFunction(self, node): + if node.name == '__init__': + visitor = InitMethodVisitor(self.token_parser) + else: + visitor = MethodVisitor(self.token_parser) + compiler.walk(node, visitor, walker=visitor) + self.context[-1].append(visitor.function) + + +class MethodVisitor(FunctionVisitor): + + function_class = Method + + +class InitMethodVisitor(MethodVisitor, AssignmentVisitor): pass + + +class TokenParser: + + def __init__(self, text): + self.text = text + '\n\n' + self.lines = self.text.splitlines(1) + self.generator = tokenize.generate_tokens(iter(self.lines).next) + self.next() + + def __iter__(self): + return self + + def next(self): + self.token = self.generator.next() + self.type, self.string, self.start, self.end, self.line = self.token + return self.token + + def goto_line(self, lineno): + while self.start[0] < lineno: + self.next() + return token + + def rhs(self, lineno): + """ + Return a whitespace-normalized expression string from the right-hand + side of an assignment at line `lineno`. + """ + self.goto_line(lineno) + while self.string != '=': + self.next() + self.stack = None + while self.type != token.NEWLINE and self.string != ';': + if self.string == '=' and not self.stack: + self.tokens = [] + self.stack = [] + self._type = None + self._string = None + self._backquote = 0 + else: + self.note_token() + self.next() + self.next() + text = ''.join(self.tokens) + return text.strip() + + closers = {')': '(', ']': '[', '}': '{'} + openers = {'(': 1, '[': 1, '{': 1} + del_ws_prefix = {'.': 1, '=': 1, ')': 1, ']': 1, '}': 1, ':': 1, ',': 1} + no_ws_suffix = {'.': 1, '=': 1, '(': 1, '[': 1, '{': 1} + + def note_token(self): + if self.type == tokenize.NL: + return + del_ws = self.del_ws_prefix.has_key(self.string) + append_ws = not self.no_ws_suffix.has_key(self.string) + if self.openers.has_key(self.string): + self.stack.append(self.string) + if (self._type == token.NAME + or self.closers.has_key(self._string)): + del_ws = 1 + elif self.closers.has_key(self.string): + assert self.stack[-1] == self.closers[self.string] + self.stack.pop() + elif self.string == '`': + if self._backquote: + del_ws = 1 + assert self.stack[-1] == '`' + self.stack.pop() + else: + append_ws = 0 + self.stack.append('`') + self._backquote = not self._backquote + if del_ws and self.tokens and self.tokens[-1] == ' ': + del self.tokens[-1] + self.tokens.append(self.string) + self._type = self.type + self._string = self.string + if append_ws: + self.tokens.append(' ') + + def function_parameters(self, lineno): + """ + Return a dictionary mapping parameters to defaults + (whitespace-normalized strings). + """ + self.goto_line(lineno) + while self.string != 'def': + self.next() + while self.string != '(': + self.next() + name = None + default = None + parameter_tuple = None + self.tokens = [] + parameters = {} + self.stack = [self.string] + self.next() + while 1: + if len(self.stack) == 1: + if parameter_tuple: + # Just encountered ")". + #print >>sys.stderr, 'parameter_tuple: %r' % self.tokens + name = ''.join(self.tokens).strip() + self.tokens = [] + parameter_tuple = None + if self.string in (')', ','): + if name: + if self.tokens: + default_text = ''.join(self.tokens).strip() + else: + default_text = None + parameters[name] = default_text + self.tokens = [] + name = None + default = None + if self.string == ')': + break + elif self.type == token.NAME: + if name and default: + self.note_token() + else: + assert name is None, ( + 'token=%r name=%r parameters=%r stack=%r' + % (self.token, name, parameters, self.stack)) + name = self.string + #print >>sys.stderr, 'name=%r' % name + elif self.string == '=': + assert name is not None, 'token=%r' % (self.token,) + assert default is None, 'token=%r' % (self.token,) + assert self.tokens == [], 'token=%r' % (self.token,) + default = 1 + self._type = None + self._string = None + self._backquote = 0 + elif name: + self.note_token() + elif self.string == '(': + parameter_tuple = 1 + self._type = None + self._string = None + self._backquote = 0 + self.note_token() + else: # ignore these tokens: + assert (self.string in ('*', '**', '\n') + or self.type == tokenize.COMMENT), ( + 'token=%r' % (self.token,)) + else: + self.note_token() + self.next() + return parameters + + +def trim_docstring(text): + """ + Trim indentation and blank lines from docstring text & return it. + + See PEP 257. + """ + if not text: + return text + # Convert tabs to spaces (following the normal Python rules) + # and split into a list of lines: + lines = text.expandtabs().splitlines() + # Determine minimum indentation (first line doesn't count): + indent = sys.maxint + for line in lines[1:]: + stripped = line.lstrip() + if stripped: + indent = min(indent, len(line) - len(stripped)) + # Remove indentation (first line is special): + trimmed = [lines[0].strip()] + if indent < sys.maxint: + for line in lines[1:]: + trimmed.append(line[indent:].rstrip()) + # Strip off trailing and leading blank lines: + while trimmed and not trimmed[-1]: + trimmed.pop() + while trimmed and not trimmed[0]: + trimmed.pop(0) + # Return a single string: + return '\n'.join(trimmed) + +def normalize_parameter_name(name): + """ + Converts a tuple like ``('a', ('b', 'c'), 'd')`` into ``'(a, (b, c), d)'`` + """ + if type(name) is TupleType: + return '(%s)' % ', '.join([normalize_parameter_name(n) for n in name]) + else: + return name diff --git a/docutils/transforms/.cvsignore b/docutils/transforms/.cvsignore deleted file mode 100644 index 0d20b6487..000000000 --- a/docutils/transforms/.cvsignore +++ /dev/null @@ -1 +0,0 @@ -*.pyc diff --git a/docutils/transforms/misc.py b/docutils/transforms/misc.py new file mode 100644 index 000000000..cf26f6c3b --- /dev/null +++ b/docutils/transforms/misc.py @@ -0,0 +1,33 @@ +# Author: David Goodger +# Contact: goodger@users.sourceforge.net +# Revision: $Revision$ +# Date: $Date$ +# Copyright: This module has been placed in the public domain. + +""" +Miscellaneous transforms. +""" + +__docformat__ = 'reStructuredText' + +from docutils.transforms import Transform, TransformError + + +class CallBack(Transform): + + """ + Inserts a callback into a document. The callback is called when the + transform is applied, which is determined by its priority. + + For use with `nodes.pending` elements. Requires a ``details['callback']`` + entry, a bound method or function which takes one parameter: the pending + node. Other data can be stored in the ``details`` attribute or in the + object hosting the callback method. + """ + + default_priority = 990 + + def apply(self): + pending = self.startnode + pending.details['callback'](pending) + pending.parent.remove(pending) diff --git a/docutils/transforms/peps.py b/docutils/transforms/peps.py index 67277a9cc..9ae7fa80b 100644 --- a/docutils/transforms/peps.py +++ b/docutils/transforms/peps.py @@ -22,7 +22,7 @@ import time from docutils import nodes, utils from docutils import ApplicationError, DataError from docutils.transforms import Transform, TransformError -from docutils.transforms import parts, references +from docutils.transforms import parts, references, misc class Headers(Transform): @@ -42,13 +42,14 @@ class Headers(Transform): def apply(self): if not len(self.document): + # @@@ replace these DataErrors with proper system messages raise DataError('Document tree is empty.') header = self.document[0] if not isinstance(header, nodes.field_list) or \ header.get('class') != 'rfc2822': raise DataError('Document does not begin with an RFC-2822 ' 'header; it is not a PEP.') - pep = title = None + pep = None for field in header: if field[0].astext().lower() == 'pep': # should be the first field value = field[1].astext() @@ -79,6 +80,8 @@ class Headers(Transform): pending = nodes.pending(PEPZero) self.document.insert(1, pending) self.document.note_pending(pending) + if len(header) < 2 or header[1][0].astext().lower() != 'title': + raise DataError('No title!') for field in header: name = field[0].astext().lower() body = field[1] @@ -150,7 +153,8 @@ class TargetNotes(Transform): """ Locate the "References" section, insert a placeholder for an external - target footnote insertion transform at the end, and run the transform. + target footnote insertion transform at the end, and schedule the + transform to run immediately. """ default_priority = 520 @@ -180,6 +184,19 @@ class TargetNotes(Transform): pending = nodes.pending(references.TargetNotes) refsect.append(pending) self.document.note_pending(pending, 0) + pending = nodes.pending(misc.CallBack, + details={'callback': self.cleanup_callback}) + refsect.append(pending) + self.document.note_pending(pending, 1) + + def cleanup_callback(self, pending): + """ + Remove an empty "References" section. + + Called after the `references.TargetNotes` transform is complete. + """ + if len(pending.parent) == 2: # and <pending> + pending.parent.parent.remove(pending.parent) class PEPZero(Transform): diff --git a/docutils/utils.py b/docutils/utils.py index a216e114a..809511cba 100644 --- a/docutils/utils.py +++ b/docutils/utils.py @@ -373,6 +373,15 @@ def normalize_name(name): return ' '.join(name.lower().split()) def new_document(source, settings=None): + """ + Return a new empty document object. + + :Parameters: + `source` : string + The path to or description of the source text of the document. + `settings` : optparse.Values object + Runtime settings. If none provided, a default set will be used. + """ if settings is None: settings = frontend.OptionParser().get_default_values() reporter = Reporter(source, settings.report_level, settings.halt_level, diff --git a/docutils/writers/.cvsignore b/docutils/writers/.cvsignore deleted file mode 100644 index 0d20b6487..000000000 --- a/docutils/writers/.cvsignore +++ /dev/null @@ -1 +0,0 @@ -*.pyc diff --git a/docutils/writers/docutils_xml.py b/docutils/writers/docutils_xml.py index e5de9c48c..4ecbfb6c2 100644 --- a/docutils/writers/docutils_xml.py +++ b/docutils/writers/docutils_xml.py @@ -22,12 +22,18 @@ class Writer(writers.Writer): settings_spec = ( '"Docutils XML" Writer Options', - 'Warning: these options may adversely affect whitespace; use them ' - 'only for reading convenience.', + 'Warning: the --newlines and --indents options may adversely affect ' + 'whitespace; use them only for reading convenience.', (('Generate XML with newlines before and after tags.', ['--newlines'], {'action': 'store_true'}), ('Generate XML with indents and newlines.', - ['--indents'], {'action': 'store_true'}),),) + ['--indents'], {'action': 'store_true'}), + ('Omit the XML declaration. Use with caution.', + ['--no-xml-declaration'], {'dest': 'xml_declaration', 'default': 1, + 'action': 'store_false'}), + ('Omit the DOCTYPE declaration.', + ['--no-doctype'], {'dest': 'doctype_declaration', 'default': 1, + 'action': 'store_false'}),)) output = None """Final translated form of `document`.""" @@ -48,9 +54,13 @@ class Writer(writers.Writer): if settings.indents: newline = '\n' indent = ' ' - output_prefix = [self.xml_declaration % settings.output_encoding, - self.doctype, - self.generator % docutils.__version__] + output_prefix = [] + if settings.xml_declaration: + output_prefix.append( + self.xml_declaration % settings.output_encoding) + if settings.doctype_declaration: + output_prefix.append(self.doctype) + output_prefix.append(self.generator % docutils.__version__) docnode = self.document.asdom().childNodes[0] self.output = (''.join(output_prefix) + docnode.toprettyxml(indent, newline)) diff --git a/docutils/writers/html4css1.py b/docutils/writers/html4css1.py index 057fb5024..855d78895 100644 --- a/docutils/writers/html4css1.py +++ b/docutils/writers/html4css1.py @@ -125,10 +125,10 @@ class HTMLTranslator(nodes.NodeVisitor): paragraph is the only child of its parent (footnotes & citations are allowed a label first). - - Regardless of the above, in definitions, table cells, field - bodies, option descriptions, and list items, mark the first - child with 'class="first"' if it is a paragraph. The stylesheet - sets the top margin to 0 for these paragraphs. + - Regardless of the above, in definitions, table cells, field bodies, + option descriptions, and list items, mark the first child with + 'class="first"' and the last child with 'class="last"'. The stylesheet + sets the margins (top & bottom respecively) to 0 for these elements. The ``no_compact_lists`` setting (``--no-compact-lists`` command-line option) disables list whitespace optimization. @@ -780,7 +780,7 @@ class HTMLTranslator(nodes.NodeVisitor): self.body.append('\n</pre>\n') def visit_meta(self, node): - self.head.append(self.starttag(node, 'meta', **node.attributes)) + self.head.append(self.emptytag(node, 'meta', **node.attributes)) def depart_meta(self, node): pass