# Author: David Goodger
# Contact: goodger@users.sourceforge.net
# Revision: $Revision$
# Date: $Date$
# Copyright: This module has been placed in the public domain.
"""
Simple HyperText Markup Language document tree Writer.
The output conforms to the HTML 4.01 Transitional DTD and to the Extensible
HTML version 1.0 Transitional DTD (*almost* strict). The output contains a
minimum of formatting information. A cascading style sheet ("default.css" by
default) is required for proper viewing with a modern graphical browser.
"""
__docformat__ = 'reStructuredText'
import sys
import os
import time
import re
from types import ListType
import docutils
from docutils import nodes, utils, writers, languages
class Writer(writers.Writer):
supported = ('html', 'html4css1', 'xhtml')
"""Formats this writer supports."""
settings_spec = (
'HTML-Specific Options',
None,
(('Specify a stylesheet URL, used verbatim. Default is '
'"default.css".',
['--stylesheet'],
{'default': 'default.css', 'metavar': ''}),
('Specify a stylesheet file, relative to the current working '
'directory. The path is adjusted relative to the output HTML '
'file. Overrides --stylesheet.',
['--stylesheet-path'],
{'metavar': ''}),
('Link to the stylesheet in the output HTML file. This is the '
'default.',
['--link-stylesheet'],
{'dest': 'embed_stylesheet', 'action': 'store_false'}),
('Embed the stylesheet in the output HTML file. The stylesheet '
'file must be accessible during processing (--stylesheet-path is '
'recommended). The stylesheet is embedded inside a comment, so it '
'must not contain the text "--" (two hyphens). Default: link the '
'stylesheet, do not embed it.',
['--embed-stylesheet'],
{'action': 'store_true'}),
('Format for footnote references: one of "superscript" or '
'"brackets". Default is "superscript".',
['--footnote-references'],
{'choices': ['superscript', 'brackets'], 'default': 'superscript',
'metavar': ''}),
('Remove extra vertical whitespace between items of bullet lists '
'and enumerated lists, when list items are "simple" (i.e., all '
'items each contain one paragraph and/or one "simple" sublist '
'only). Default: enabled.',
['--compact-lists'],
{'default': 1, 'action': 'store_true'}),
('Disable compact simple bullet and enumerated lists.',
['--no-compact-lists'],
{'dest': 'compact_lists', 'action': 'store_false'}),))
relative_path_settings = ('stylesheet_path',)
output = None
"""Final translated form of `document`."""
def __init__(self):
writers.Writer.__init__(self)
self.translator_class = HTMLTranslator
def translate(self):
visitor = self.translator_class(self.document)
self.document.walkabout(visitor)
self.output = visitor.astext()
self.head_prefix = visitor.head_prefix
self.stylesheet = visitor.stylesheet
self.head = visitor.head
self.body_prefix = visitor.body_prefix
self.body_pre_docinfo = visitor.body_pre_docinfo
self.docinfo = visitor.docinfo
self.body = visitor.body
self.body_suffix = visitor.body_suffix
class HTMLTranslator(nodes.NodeVisitor):
"""
This HTML writer has been optimized to produce visually compact
lists (less vertical whitespace). HTML's mixed content models
allow list items to contain "
body elements
" or
"
just text
" or even "
text
and body
elements
combined
", each with different effects. It would
be best to stick with strict body elements in list items, but they
affect vertical spacing in browsers (although they really
shouldn't).
Here is an outline of the optimization:
- Check for and omit
tags in "simple" lists: list items
contain either a single paragraph, a nested simple list, or a
paragraph followed by a nested simple list. This means that
this list can be compact:
- Item 1.
- Item 2.
But this list cannot be compact:
- Item 1.
This second paragraph forces space between list items.
- Item 2.
- In non-list contexts, omit
tags on a paragraph if that
paragraph is the only child of its parent (footnotes & citations
are allowed a label first).
- Regardless of the above, in definitions, table cells, field
bodies, option descriptions, and list items, mark the first
child with 'class="first"' if it is a paragraph. The stylesheet
sets the top margin to 0 for these paragraphs.
The ``no_compact_lists`` setting (``--no-compact-lists`` command-line
option) disables list whitespace optimization.
"""
xml_declaration = '\n'
doctype = ('\n')
html_head = ('\n
\n')
content_type = ('\n')
generator = ('\n')
stylesheet_link = '\n'
embedded_stylesheet = '\n'
named_tags = {'a': 1, 'applet': 1, 'form': 1, 'frame': 1, 'iframe': 1,
'img': 1, 'map': 1}
words_and_spaces = re.compile(r'\S+| +|\n')
def __init__(self, document):
nodes.NodeVisitor.__init__(self, document)
self.settings = settings = document.settings
lcode = settings.language_code
self.language = languages.get_language(lcode)
self.head_prefix = [
self.xml_declaration % settings.output_encoding,
self.doctype,
self.html_head % (lcode, lcode),
self.content_type % settings.output_encoding,
self.generator % docutils.__version__]
self.head = []
if settings.embed_stylesheet:
stylesheet = self.get_stylesheet_reference(os.getcwd())
stylesheet_text = open(stylesheet).read()
self.stylesheet = [self.embedded_stylesheet % stylesheet_text]
else:
stylesheet = self.get_stylesheet_reference()
if stylesheet:
self.stylesheet = [self.stylesheet_link % stylesheet]
else:
self.stylesheet = []
self.body_prefix = ['\n\n']
self.body_pre_docinfo = []
self.docinfo = []
self.body = []
self.body_suffix = ['\n\n']
self.section_level = 0
self.context = []
self.topic_class = ''
self.colspecs = []
self.compact_p = 1
self.compact_simple = None
self.in_docinfo = None
def get_stylesheet_reference(self, relative_to=None):
settings = self.settings
if settings.stylesheet_path:
if relative_to == None:
relative_to = settings._destination
return utils.relative_path(relative_to, settings.stylesheet_path)
else:
return settings.stylesheet
def astext(self):
return ''.join(self.head_prefix + self.head + self.stylesheet
+ self.body_prefix + self.body_pre_docinfo
+ self.docinfo + self.body + self.body_suffix)
def encode(self, text):
"""Encode special characters in `text` & return."""
# @@@ A codec to do these and all other HTML entities would be nice.
text = text.replace("&", "&")
text = text.replace("<", "<")
text = text.replace('"', """)
text = text.replace(">", ">")
text = text.replace("@", "@") # may thwart some address harvesters
return text
def attval(self, text,
whitespace=re.compile('[\n\r\t\v\f]')):
"""Cleanse, HTML encode, and return attribute value text."""
return self.encode(whitespace.sub(' ', text))
def starttag(self, node, tagname, suffix='\n', infix='', **attributes):
"""
Construct and return a start tag given a node (id & class attributes
are extracted), tag name, and optional attributes.
"""
tagname = tagname.lower()
atts = {}
for (name, value) in attributes.items():
atts[name.lower()] = value
for att in ('class',): # append to node attribute
if node.has_key(att) or atts.has_key(att):
atts[att] = \
(node.get(att, '') + ' ' + atts.get(att, '')).strip()
for att in ('id',): # node attribute overrides
if node.has_key(att):
atts[att] = node[att]
if atts.has_key('id') and self.named_tags.has_key(tagname):
atts['name'] = atts['id'] # for compatibility with old browsers
attlist = atts.items()
attlist.sort()
parts = [tagname]
for name, value in attlist:
if value is None: # boolean attribute
# According to the HTML spec, ```` is good,
# ```` is bad.
# (But the XHTML (XML) spec says the opposite. )
parts.append(name.lower())
elif isinstance(value, ListType):
values = [str(v) for v in value]
parts.append('%s="%s"' % (name.lower(),
self.attval(' '.join(values))))
else:
parts.append('%s="%s"' % (name.lower(),
self.attval(str(value))))
return '<%s%s>%s' % (' '.join(parts), infix, suffix)
def emptytag(self, node, tagname, suffix='\n', **attributes):
"""Construct and return an XML-compatible empty tag."""
return self.starttag(node, tagname, suffix, infix=' /', **attributes)
def visit_Text(self, node):
self.body.append(self.encode(node.astext()))
def depart_Text(self, node):
pass
def visit_address(self, node):
self.visit_docinfo_item(node, 'address', meta=None)
self.body.append(self.starttag(node, 'pre', CLASS='address'))
def depart_address(self, node):
self.body.append('\n\n')
self.depart_docinfo_item()
def visit_admonition(self, node, name):
self.body.append(self.starttag(node, 'div', CLASS=name))
self.body.append('
'
+ self.language.labels[name] + '
\n')
def depart_admonition(self):
self.body.append('\n')
def visit_attention(self, node):
self.visit_admonition(node, 'attention')
def depart_attention(self, node):
self.depart_admonition()
def visit_author(self, node):
self.visit_docinfo_item(node, 'author')
def depart_author(self, node):
self.depart_docinfo_item()
def visit_authors(self, node):
pass
def depart_authors(self, node):
pass
def visit_block_quote(self, node):
self.body.append(self.starttag(node, 'blockquote'))
def depart_block_quote(self, node):
self.body.append('\n')
def check_simple_list(self, node):
"""Check for a simple list that can be rendered compactly."""
visitor = SimpleListChecker(self.document)
try:
node.walk(visitor)
except nodes.NodeFound:
return None
else:
return 1
def visit_bullet_list(self, node):
atts = {}
old_compact_simple = self.compact_simple
self.context.append((self.compact_simple, self.compact_p))
self.compact_p = None
self.compact_simple = (self.settings.compact_lists and
(self.compact_simple
or self.topic_class == 'contents'
or self.check_simple_list(node)))
if self.compact_simple and not old_compact_simple:
atts['class'] = 'simple'
self.body.append(self.starttag(node, 'ul', **atts))
def depart_bullet_list(self, node):
self.compact_simple, self.compact_p = self.context.pop()
self.body.append('\n')
def visit_caption(self, node):
self.body.append(self.starttag(node, 'p', '', CLASS='caption'))
def depart_caption(self, node):
self.body.append('
tags if this is an only child and optimizable.
if (self.compact_simple or
self.compact_p and (len(node.parent) == 1 or
len(node.parent) == 2 and
isinstance(node.parent[0], nodes.label))):
self.context.append('')
else:
self.body.append(self.starttag(node, 'p', ''))
self.context.append('