python-peps/docutils/parsers/rst/__init__.py

121 lines
4.1 KiB
Python

# Author: David Goodger
# Contact: goodger@users.sourceforge.net
# Revision: $Revision$
# Date: $Date$
# Copyright: This module has been placed in the public domain.
"""
This is ``docutils.parsers.rst`` package. It exports a single class, `Parser`,
the reStructuredText parser.
Usage
=====
1. Create a parser::
parser = docutils.parsers.rst.Parser()
Several optional arguments may be passed to modify the parser's behavior.
Please see `Customizing the Parser`_ below for details.
2. Gather input (a multi-line string), by reading a file or the standard
input::
input = sys.stdin.read()
3. Create a new empty `docutils.nodes.document` tree::
document = docutils.utils.new_document(source, settings)
See `docutils.utils.new_document()` for parameter details.
4. Run the parser, populating the document tree::
parser.parse(input, document)
Parser Overview
===============
The reStructuredText parser is implemented as a state machine, examining its
input one line at a time. To understand how the parser works, please first
become familiar with the `docutils.statemachine` module, then see the
`states` module.
Customizing the Parser
----------------------
Anything that isn't already customizable is that way simply because that type
of customizability hasn't been implemented yet. Patches welcome!
When instantiating an object of the `Parser` class, two parameters may be
passed: ``rfc2822`` and ``inliner``. Pass ``rfc2822=1`` to enable an initial
RFC-2822 style header block, parsed as a "field_list" element (with "class"
attribute set to "rfc2822"). Currently this is the only body-level element
which is customizable without subclassing. (Tip: subclass `Parser` and change
its "state_classes" and "initial_state" attributes to refer to new classes.
Contact the author if you need more details.)
The ``inliner`` parameter takes an instance of `states.Inliner` or a subclass.
It handles inline markup recognition. A common extension is the addition of
further implicit hyperlinks, like "RFC 2822". This can be done by subclassing
`states.Inliner`, adding a new method for the implicit markup, and adding a
``(pattern, method)`` pair to the "implicit_dispatch" attribute of the
subclass. See `states.Inliner.implicit_inline()` for details. Explicit
inline markup can be customized in a `states.Inliner` subclass via the
``patterns.initial`` and ``dispatch`` attributes (and new methods as
appropriate).
"""
__docformat__ = 'reStructuredText'
import docutils.parsers
import docutils.statemachine
from docutils.parsers.rst import states
class Parser(docutils.parsers.Parser):
"""The reStructuredText parser."""
supported = ('restructuredtext', 'rst', 'rest', 'restx', 'rtxt', 'rstx')
"""Aliases this parser supports."""
settings_spec = (
'reStructuredText Parser Options',
None,
(('Recognize and link to PEP references (like "PEP 258").',
['--pep-references'],
{'action': 'store_true'}),
('Recognize and link to RFC references (like "RFC 822").',
['--rfc-references'],
{'action': 'store_true'}),
('Set number of spaces for tab expansion (default 8).',
['--tab-width'],
{'metavar': '<width>', 'type': 'int', 'default': 8}),))
def __init__(self, rfc2822=None, inliner=None):
if rfc2822:
self.initial_state = 'RFC2822Body'
else:
self.initial_state = 'Body'
self.state_classes = states.state_classes
self.inliner = inliner
def parse(self, inputstring, document):
"""Parse `inputstring` and populate `document`, a document tree."""
self.setup_parse(inputstring, document)
debug = document.reporter[''].debug
self.statemachine = states.RSTStateMachine(
state_classes=self.state_classes,
initial_state=self.initial_state,
debug=debug)
inputlines = docutils.statemachine.string2lines(
inputstring, tab_width=document.settings.tab_width,
convert_whitespace=1)
self.statemachine.run(inputlines, document, inliner=self.inliner)
self.finish_parse()