2004-04-18 23:50:43 -04:00
< ? php
2004-09-04 16:48:57 -04:00
#
# Markdown - A text-to-HTML conversion tool for web writers
#
# Copyright (c) 2004 John Gruber
# <http://daringfireball.net/projects/markdown/>
#
2005-01-24 01:30:16 -05:00
# Copyright (c) 2004 Michel Fortin - PHP Port
2004-09-04 16:48:57 -04:00
# <http://www.michelf.com/projects/php-markdown/>
#
global $MarkdownPHPVersion , $MarkdownSyntaxVersion ,
$md_empty_element_suffix , $md_tab_width ,
$md_nested_brackets_depth , $md_nested_brackets ,
2005-01-24 01:30:16 -05:00
$md_escape_table , $md_backslash_escape_table ,
$md_list_level ;
2004-04-18 23:50:43 -04:00
2005-01-24 01:30:16 -05:00
$MarkdownPHPVersion = '1.0.1' ; # Fri 17 Dec 2004
$MarkdownSyntaxVersion = '1.0.1' ; # Sun 12 Dec 2004
2004-09-04 16:48:57 -04:00
#
# Global default settings:
#
$md_empty_element_suffix = " /> " ; # Change to ">" for HTML output
$md_tab_width = 4 ;
# -- WordPress Plugin Interface -----------------------------------------------
2004-04-18 23:50:43 -04:00
/*
2004-09-04 16:48:57 -04:00
Plugin Name : Markdown
2005-01-24 01:30:16 -05:00
Plugin URI : http :// www . michelf . com / projects / php - markdown /
2004-09-04 16:48:57 -04:00
Description : < a href = " http://daringfireball.net/projects/markdown/syntax " > Markdown syntax </ a > allows you to write using an easy - to - read , easy - to - write plain text format . Based on the original Perl version by < a href = " http://daringfireball.net/ " > John Gruber </ a >. < a href = " http://www.michelf.com/projects/php-markdown/ " > More ...</ a >
2005-01-24 01:30:16 -05:00
Version : 1.0 . 1
2004-09-04 16:48:57 -04:00
Author : Michel Fortin
Author URI : http :// www . michelf . com /
2004-04-18 23:50:43 -04:00
*/
2004-09-04 16:48:57 -04:00
if ( isset ( $wp_version )) {
# Remove default WordPress auto-paragraph filter.
remove_filter ( 'the_content' , 'wpautop' );
remove_filter ( 'the_excerpt' , 'wpautop' );
remove_filter ( 'comment_text' , 'wpautop' );
# Add Markdown filter with priority 6 (same as Textile).
add_filter ( 'the_content' , 'Markdown' , 6 );
add_filter ( 'the_excerpt' , 'Markdown' , 6 );
2005-03-13 20:02:04 -05:00
add_filter ( 'the_excerpt_rss' , 'Markdown' , 6 );
2004-09-04 16:48:57 -04:00
add_filter ( 'comment_text' , 'Markdown' , 6 );
}
2004-04-18 23:50:43 -04:00
2005-01-24 01:30:16 -05:00
# -- bBlog Plugin Info --------------------------------------------------------
function identify_modifier_markdown () {
global $MarkdownPHPVersion ;
return array (
'name' => 'markdown' ,
'type' => 'modifier' ,
'nicename' => 'Markdown' ,
'description' => 'A text-to-HTML conversion tool for web writers' ,
'authors' => 'Michel Fortin and John Gruber' ,
'licence' => 'GPL' ,
'version' => $MarkdownPHPVersion ,
'help' => '<a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>'
);
}
# -- Smarty Modifier Interface ------------------------------------------------
2004-09-04 16:48:57 -04:00
function smarty_modifier_markdown ( $text ) {
return Markdown ( $text );
}
2005-01-24 01:30:16 -05:00
# -- Textile Compatibility Mode -----------------------------------------------
# Rename this file to "classTextile.php" and it can replace Textile anywhere.
if ( strcasecmp ( substr ( __FILE__ , - 16 ), " classTextile.php " ) == 0 ) {
# Try to include PHP SmartyPants. Should be in the same directory.
@ include_once 'smartypants.php' ;
# Fake Textile class. It calls Markdown instead.
class Textile {
function TextileThis ( $text , $lite = '' , $encode = '' , $noimage = '' , $strict = '' ) {
if ( $lite == '' && $encode == '' ) $text = Markdown ( $text );
if ( function_exists ( 'SmartyPants' )) $text = SmartyPants ( $text );
return $text ;
}
}
}
#
# Globals:
#
# Regex to match balanced [brackets].
# Needed to insert a maximum bracked depth while converting to PHP.
2004-09-04 16:48:57 -04:00
$md_nested_brackets_depth = 6 ;
$md_nested_brackets =
str_repeat ( '(?>[^\[\]]+|\[' , $md_nested_brackets_depth ) .
str_repeat ( '\])*' , $md_nested_brackets_depth );
2005-01-24 01:30:16 -05:00
# Table of hash values for escaped characters:
2004-09-04 16:48:57 -04:00
$md_escape_table = array (
2004-04-18 23:50:43 -04:00
" \\ " => md5 ( " \\ " ),
" ` " => md5 ( " ` " ),
" * " => md5 ( " * " ),
" _ " => md5 ( " _ " ),
" { " => md5 ( " { " ),
" } " => md5 ( " } " ),
" [ " => md5 ( " [ " ),
" ] " => md5 ( " ] " ),
" ( " => md5 ( " ( " ),
" ) " => md5 ( " ) " ),
2005-01-24 01:30:16 -05:00
" > " => md5 ( " > " ),
2004-04-18 23:50:43 -04:00
" # " => md5 ( " # " ),
2005-01-24 01:30:16 -05:00
" + " => md5 ( " + " ),
" - " => md5 ( " - " ),
2004-04-18 23:50:43 -04:00
" . " => md5 ( " . " ),
" ! " => md5 ( " ! " )
);
2004-09-04 16:48:57 -04:00
# Create an identical table but for escaped characters.
$md_backslash_escape_table ;
foreach ( $md_escape_table as $key => $char )
$md_backslash_escape_table [ " \\ $key " ] = $char ;
2004-04-18 23:50:43 -04:00
function Markdown ( $text ) {
2005-01-24 01:30:16 -05:00
#
# Main function. The order in which other subs are called here is
# essential. Link and image substitutions need to happen before
# _EscapeSpecialChars(), so that any *'s or _'s in the <a>
# and <img> tags get encoded.
#
# Clear the global hashes. If we don't clear these, you get conflicts
# from other articles when generating a page which contains more than
# one article (e.g. an index page that shows the N most recent
# articles):
2004-09-04 16:48:57 -04:00
global $md_urls , $md_titles , $md_html_blocks ;
$md_urls = array ();
$md_titles = array ();
$md_html_blocks = array ();
2005-01-24 01:30:16 -05:00
# Standardize line endings:
# DOS to Unix and Mac to Unix
2004-04-18 23:50:43 -04:00
$text = str_replace ( array ( " \r \n " , " \r " ), " \n " , $text );
2004-09-04 16:48:57 -04:00
2005-01-24 01:30:16 -05:00
# Make sure $text ends with a couple of newlines:
2004-04-18 23:50:43 -04:00
$text .= " \n \n " ;
2004-09-04 16:48:57 -04:00
2005-01-24 01:30:16 -05:00
# Convert all tabs to spaces.
2004-04-18 23:50:43 -04:00
$text = _Detab ( $text );
2004-09-04 16:48:57 -04:00
2005-01-24 01:30:16 -05:00
# Strip any lines consisting only of spaces and tabs.
# This makes subsequent regexen easier to write, because we can
# match consecutive blank lines with /\n+/ instead of something
# contorted like /[ \t]*\n+/ .
2004-04-18 23:50:43 -04:00
$text = preg_replace ( '/^[ \t]+$/m' , '' , $text );
2004-09-04 16:48:57 -04:00
2005-01-24 01:30:16 -05:00
# Turn block-level HTML blocks into hash entries
2004-04-18 23:50:43 -04:00
$text = _HashHTMLBlocks ( $text );
2004-09-04 16:48:57 -04:00
2005-01-24 01:30:16 -05:00
# Strip link definitions, store in hashes.
2004-04-18 23:50:43 -04:00
$text = _StripLinkDefinitions ( $text );
2004-09-04 16:48:57 -04:00
2004-04-18 23:50:43 -04:00
$text = _RunBlockGamut ( $text );
2004-09-04 16:48:57 -04:00
2004-04-18 23:50:43 -04:00
$text = _UnescapeSpecialChars ( $text );
2004-09-04 16:48:57 -04:00
2004-04-18 23:50:43 -04:00
return $text . " \n " ;
}
2004-09-04 16:48:57 -04:00
2004-04-18 23:50:43 -04:00
function _StripLinkDefinitions ( $text ) {
2005-01-24 01:30:16 -05:00
#
# Strips link definitions from text, stores the URLs and titles in
# hash references.
#
global $md_tab_width ;
$less_than_tab = $md_tab_width - 1 ;
# Link defs are in the form: ^[id]: url "optional title"
2004-04-18 23:50:43 -04:00
$text = preg_replace_callback ( ' {
2005-01-24 01:30:16 -05:00
^ [ ]{ 0 , '.$less_than_tab.' } \ [( .+ ) \ ] : # id = $1
2004-04-18 23:50:43 -04:00
[ \t ] *
\n ? # maybe *one* newline
[ \t ] *
2004-09-04 16:48:57 -04:00
< ? ( \S + ? ) > ? # url = $2
2004-04-18 23:50:43 -04:00
[ \t ] *
\n ? # maybe one newline
[ \t ] *
( ? :
2005-01-24 01:30:16 -05:00
( ? <= \s ) # lookbehind for whitespace
2004-04-18 23:50:43 -04:00
[ " (]
( .+ ? ) # title = $3
[ " )]
[ \t ] *
) ? # title is optional
( ? : \n +| \Z )
} xm ' ,
'_StripLinkDefinitions_callback' ,
$text );
return $text ;
}
function _StripLinkDefinitions_callback ( $matches ) {
2004-09-04 16:48:57 -04:00
global $md_urls , $md_titles ;
2004-04-18 23:50:43 -04:00
$link_id = strtolower ( $matches [ 1 ]);
2004-09-04 16:48:57 -04:00
$md_urls [ $link_id ] = _EncodeAmpsAndAngles ( $matches [ 2 ]);
2004-04-18 23:50:43 -04:00
if ( isset ( $matches [ 3 ]))
2005-01-24 01:30:16 -05:00
$md_titles [ $link_id ] = str_replace ( '"' , '"' , $matches [ 3 ]);
2004-04-18 23:50:43 -04:00
return '' ; # String that will replace the block
}
2004-09-04 16:48:57 -04:00
2004-04-18 23:50:43 -04:00
function _HashHTMLBlocks ( $text ) {
2005-01-24 01:30:16 -05:00
global $md_tab_width ;
$less_than_tab = $md_tab_width - 1 ;
# Hashify HTML blocks:
# We only want to do this for block-level HTML tags, such as headers,
# lists, and tables. That's because we still want to wrap <p>s around
# "paragraphs" that are wrapped in non-block-level tags, such as anchors,
# phrase emphasis, and spans. The list of tags we're looking for is
# hard-coded:
2004-09-04 16:48:57 -04:00
$block_tags_a = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|' .
'script|noscript|form|fieldset|iframe|math|ins|del' ;
$block_tags_b = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|' .
'script|noscript|form|fieldset|iframe|math' ;
2005-01-24 01:30:16 -05:00
# First, look for nested blocks, e.g.:
# <div>
# <div>
# tags for inner block must be indented.
# </div>
# </div>
#
# The outermost tags must start at the left margin for this to match, and
# the inner nested divs must be indented.
# We need to do this before the next, more liberal match, because the next
# match will start at the first `<div>` and stop at the first `</div>`.
2004-04-18 23:50:43 -04:00
$text = preg_replace_callback ( " {
( # save in $1
^ # start of line (with /m)
2004-09-04 16:48:57 -04:00
< ( $block_tags_a ) # start tag = $2
2004-04-18 23:50:43 -04:00
\\b # word break
( .* \\n ) * ? # any number of lines, minimally matching
</ \\2 > # the matching end tag
[ \\t ] * # trailing spaces/tabs
( ? = \\n +| \\Z ) # followed by a newline or end of document
)
} xm " ,
'_HashHTMLBlocks_callback' ,
$text );
2005-01-24 01:30:16 -05:00
#
# Now match more liberally, simply from `\n<tag>` to `</tag>\n`
#
2004-04-18 23:50:43 -04:00
$text = preg_replace_callback ( " {
( # save in $1
^ # start of line (with /m)
2004-09-04 16:48:57 -04:00
< ( $block_tags_b ) # start tag = $2
2004-04-18 23:50:43 -04:00
\\b # word break
( .* \\n ) * ? # any number of lines, minimally matching
.*</ \\2 > # the matching end tag
[ \\t ] * # trailing spaces/tabs
( ? = \\n +| \\Z ) # followed by a newline or end of document
)
} xm " ,
'_HashHTMLBlocks_callback' ,
$text );
2005-01-24 01:30:16 -05:00
# Special case just for <hr />. It was easier to make a special case than
# to make the other regex more complicated.
2004-04-18 23:50:43 -04:00
$text = preg_replace_callback ( ' {
( ? :
( ? <= \n\n ) # Starting after a blank line
| # or
\A\n ? # the beginning of the doc
)
( # save in $1
2005-01-24 01:30:16 -05:00
[ ]{ 0 , '.$less_than_tab.' }
2004-04-18 23:50:43 -04:00
< ( hr ) # start tag = $2
\b # word break
([ ^<> ]) * ? #
/ ?> # the matching end tag
2005-01-24 01:30:16 -05:00
[ \t ] *
2004-04-18 23:50:43 -04:00
( ? = \n { 2 ,} | \Z ) # followed by a blank line or end of document
)
} x ' ,
'_HashHTMLBlocks_callback' ,
$text );
2005-01-24 01:30:16 -05:00
# Special case for standalone HTML comments:
$text = preg_replace_callback ( ' {
( ? :
( ? <= \n\n ) # Starting after a blank line
| # or
\A\n ? # the beginning of the doc
)
( # save in $1
[ ]{ 0 , '.$less_than_tab.' }
( ? s :
<!
( --.* ? -- \s * ) +
>
)
[ \t ] *
( ? = \n { 2 ,} | \Z ) # followed by a blank line or end of document
)
} x ' ,
'_HashHTMLBlocks_callback' ,
$text );
2004-04-18 23:50:43 -04:00
return $text ;
}
function _HashHTMLBlocks_callback ( $matches ) {
2004-09-04 16:48:57 -04:00
global $md_html_blocks ;
2004-04-18 23:50:43 -04:00
$text = $matches [ 1 ];
$key = md5 ( $text );
2004-09-04 16:48:57 -04:00
$md_html_blocks [ $key ] = $text ;
2004-04-18 23:50:43 -04:00
return " \n \n $key\n\n " ; # String that will replace the block
}
2004-09-04 16:48:57 -04:00
2004-04-18 23:50:43 -04:00
function _RunBlockGamut ( $text ) {
2005-01-24 01:30:16 -05:00
#
# These are all the transformations that form block-level
# tags like paragraphs, headers, and list items.
#
2004-09-04 16:48:57 -04:00
global $md_empty_element_suffix ;
2004-04-18 23:50:43 -04:00
$text = _DoHeaders ( $text );
2004-09-04 16:48:57 -04:00
# Do Horizontal Rules:
2004-04-18 23:50:43 -04:00
$text = preg_replace (
2005-01-24 01:30:16 -05:00
array ( '{^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$}mx' ,
'{^[ ]{0,2}([ ]? -[ ]?){3,}[ \t]*$}mx' ,
'{^[ ]{0,2}([ ]? _[ ]?){3,}[ \t]*$}mx' ),
2004-09-04 16:48:57 -04:00
" \n <hr $md_empty_element_suffix\n " ,
2004-04-18 23:50:43 -04:00
$text );
$text = _DoLists ( $text );
$text = _DoCodeBlocks ( $text );
$text = _DoBlockQuotes ( $text );
2005-01-24 01:30:16 -05:00
# We already ran _HashHTMLBlocks() before, in Markdown(), but that
# was to escape raw HTML in the original Markdown source. This time,
# we're escaping the markup we've just created, so that we don't wrap
# <p> tags around block-level tags.
2004-04-18 23:50:43 -04:00
$text = _HashHTMLBlocks ( $text );
$text = _FormParagraphs ( $text );
return $text ;
}
function _RunSpanGamut ( $text ) {
2005-01-24 01:30:16 -05:00
#
# These are all the transformations that occur *within* block-level
# tags like paragraphs, headers, and list items.
#
2004-09-04 16:48:57 -04:00
global $md_empty_element_suffix ;
2005-01-24 01:30:16 -05:00
2004-04-18 23:50:43 -04:00
$text = _DoCodeSpans ( $text );
2005-01-24 01:30:16 -05:00
$text = _EscapeSpecialChars ( $text );
2004-04-18 23:50:43 -04:00
2004-09-04 16:48:57 -04:00
# Process anchor and image tags. Images must come first,
# because ![foo][f] looks like an anchor.
2004-04-18 23:50:43 -04:00
$text = _DoImages ( $text );
$text = _DoAnchors ( $text );
2005-01-24 01:30:16 -05:00
# Make links out of things like `<http://example.com/>`
# Must come after _DoAnchors(), because you can use < and >
# delimiters in inline links like [this](<url>).
$text = _DoAutoLinks ( $text );
# Fix unencoded ampersands and <'s:
$text = _EncodeAmpsAndAngles ( $text );
2004-04-18 23:50:43 -04:00
$text = _DoItalicsAndBold ( $text );
2004-09-04 16:48:57 -04:00
2005-01-24 01:30:16 -05:00
# Do hard breaks:
2004-09-04 16:48:57 -04:00
$text = preg_replace ( '/ {2,}\n/' , " <br $md_empty_element_suffix\n " , $text );
2004-04-18 23:50:43 -04:00
return $text ;
}
function _EscapeSpecialChars ( $text ) {
2004-09-04 16:48:57 -04:00
global $md_escape_table ;
2004-04-18 23:50:43 -04:00
$tokens = _TokenizeHTML ( $text );
$text = '' ; # rebuild $text from the tokens
2005-01-24 01:30:16 -05:00
# $in_pre = 0; # Keep track of when we're inside <pre> or <code> tags.
# $tags_to_skip = "!<(/?)(?:pre|code|kbd|script|math)[\s>]!";
2004-04-18 23:50:43 -04:00
foreach ( $tokens as $cur_token ) {
if ( $cur_token [ 0 ] == 'tag' ) {
2005-01-24 01:30:16 -05:00
# Within tags, encode * and _ so they don't conflict
# with their use in Markdown for italics and strong.
# We're replacing each such character with its
# corresponding MD5 checksum value; this is likely
# overkill, but it should prevent us from colliding
# with the escape values by accident.
2004-04-18 23:50:43 -04:00
$cur_token [ 1 ] = str_replace ( array ( '*' , '_' ),
2004-09-04 16:48:57 -04:00
array ( $md_escape_table [ '*' ], $md_escape_table [ '_' ]),
2004-04-18 23:50:43 -04:00
$cur_token [ 1 ]);
$text .= $cur_token [ 1 ];
} else {
$t = $cur_token [ 1 ];
2004-09-04 16:48:57 -04:00
$t = _EncodeBackslashEscapes ( $t );
2004-04-18 23:50:43 -04:00
$text .= $t ;
}
}
return $text ;
}
function _DoAnchors ( $text ) {
2005-01-24 01:30:16 -05:00
#
# Turn Markdown link shortcuts into XHTML <a> tags.
#
2004-09-04 16:48:57 -04:00
global $md_nested_brackets ;
#
# First, handle reference-style links: [link text] [id]
#
2004-04-18 23:50:43 -04:00
$text = preg_replace_callback ( " {
( # wrap whole match in $1
\\ [
2004-09-04 16:48:57 -04:00
( $md_nested_brackets ) # link text = $2
2004-04-18 23:50:43 -04:00
\\ ]
[ ] ? # one optional space
( ? : \\n [ ] * ) ? # one optional newline followed by spaces
\\ [
2004-09-04 16:48:57 -04:00
( .* ? ) # id = $3
2004-04-18 23:50:43 -04:00
\\ ]
)
} xs " ,
'_DoAnchors_reference_callback' , $text );
2004-09-04 16:48:57 -04:00
2005-01-24 01:30:16 -05:00
#
# Next, inline-style links: [link text](url "optional title")
#
2004-04-18 23:50:43 -04:00
$text = preg_replace_callback ( " {
( # wrap whole match in $1
\\ [
2004-09-04 16:48:57 -04:00
( $md_nested_brackets ) # link text = $2
2004-04-18 23:50:43 -04:00
\\ ]
\\ ( # literal paren
[ \\t ] *
2005-01-24 01:30:16 -05:00
< ? ( .* ? ) > ? # href = $3
2004-04-18 23:50:43 -04:00
[ \\t ] *
2004-09-04 16:48:57 -04:00
( # $4
2004-04-18 23:50:43 -04:00
([ ' \ " ]) # quote char = $ 5
2004-09-04 16:48:57 -04:00
( .* ? ) # Title = $6
2004-04-18 23:50:43 -04:00
\\5 # matching quote
) ? # title is optional
\\ )
)
} xs " ,
'_DoAnchors_inline_callback' , $text );
2004-09-04 16:48:57 -04:00
2004-04-18 23:50:43 -04:00
return $text ;
}
function _DoAnchors_reference_callback ( $matches ) {
2004-09-04 16:48:57 -04:00
global $md_urls , $md_titles , $md_escape_table ;
2004-04-18 23:50:43 -04:00
$whole_match = $matches [ 1 ];
$link_text = $matches [ 2 ];
$link_id = strtolower ( $matches [ 3 ]);
if ( $link_id == " " ) {
$link_id = strtolower ( $link_text ); # for shortcut links like [this][].
}
2004-09-04 16:48:57 -04:00
if ( isset ( $md_urls [ $link_id ])) {
$url = $md_urls [ $link_id ];
# We've got to encode these to avoid conflicting with italics/bold.
$url = str_replace ( array ( '*' , '_' ),
array ( $md_escape_table [ '*' ], $md_escape_table [ '_' ]),
$url );
$result = " <a href= \" $url\ " " ;
if ( isset ( $md_titles [ $link_id ] ) ) {
$title = $md_titles [ $link_id ];
2004-04-18 23:50:43 -04:00
$title = str_replace ( array ( '*' , '_' ),
2004-09-04 16:48:57 -04:00
array ( $md_escape_table [ '*' ],
$md_escape_table [ '_' ]), $title );
2004-04-18 23:50:43 -04:00
$result .= " title= \" $title\ " " ;
}
$result .= " > $link_text </a> " ;
}
else {
$result = $whole_match ;
}
return $result ;
}
function _DoAnchors_inline_callback ( $matches ) {
2004-09-04 16:48:57 -04:00
global $md_escape_table ;
2005-01-24 01:30:16 -05:00
$whole_match = $matches [ 1 ];
$link_text = $matches [ 2 ];
$url = $matches [ 3 ];
$title =& $matches [ 6 ];
2004-04-18 23:50:43 -04:00
# We've got to encode these to avoid conflicting with italics/bold.
2004-09-04 16:48:57 -04:00
$url = str_replace ( array ( '*' , '_' ),
array ( $md_escape_table [ '*' ], $md_escape_table [ '_' ]),
$url );
2004-04-18 23:50:43 -04:00
$result = " <a href= \" $url\ " " ;
2004-09-04 16:48:57 -04:00
if ( isset ( $title )) {
2005-01-24 01:30:16 -05:00
$title = str_replace ( '"' , '"' , $title );
2004-09-04 16:48:57 -04:00
$title = str_replace ( array ( '*' , '_' ),
array ( $md_escape_table [ '*' ], $md_escape_table [ '_' ]),
$title );
$result .= " title= \" $title\ " " ;
2004-04-18 23:50:43 -04:00
}
2004-09-04 16:48:57 -04:00
2004-04-18 23:50:43 -04:00
$result .= " > $link_text </a> " ;
return $result ;
}
function _DoImages ( $text ) {
2005-01-24 01:30:16 -05:00
#
# Turn Markdown image shortcuts into <img> tags.
#
#
# First, handle reference-style labeled images: ![alt text][id]
#
2004-04-18 23:50:43 -04:00
$text = preg_replace_callback ( ' {
( # wrap whole match in $1
! \ [
2004-09-04 16:48:57 -04:00
( .* ? ) # alt text = $2
2004-04-18 23:50:43 -04:00
\ ]
[ ] ? # one optional space
( ? : \n [ ] * ) ? # one optional newline followed by spaces
\ [
2004-09-04 16:48:57 -04:00
( .* ? ) # id = $3
2004-04-18 23:50:43 -04:00
\ ]
)
} xs ' ,
'_DoImages_reference_callback' , $text );
#
# Next, handle inline images: ![alt text](url "optional title")
# Don't forget: encode * and _
$text = preg_replace_callback ( " {
( # wrap whole match in $1
! \\ [
( .* ? ) # alt text = $2
\\ ]
\\ ( # literal paren
[ \\t ] *
2004-09-04 16:48:57 -04:00
< ? ( \S + ? ) > ? # src url = $3
2004-04-18 23:50:43 -04:00
[ \\t ] *
2004-09-04 16:48:57 -04:00
( # $4
2004-04-18 23:50:43 -04:00
([ ' \ " ]) # quote char = $ 5
2004-09-04 16:48:57 -04:00
( .* ? ) # title = $6
2004-04-18 23:50:43 -04:00
\\5 # matching quote
[ \\t ] *
) ? # title is optional
\\ )
)
} xs " ,
'_DoImages_inline_callback' , $text );
return $text ;
}
function _DoImages_reference_callback ( $matches ) {
2004-09-04 16:48:57 -04:00
global $md_urls , $md_titles , $md_empty_element_suffix , $md_escape_table ;
2004-04-18 23:50:43 -04:00
$whole_match = $matches [ 1 ];
$alt_text = $matches [ 2 ];
$link_id = strtolower ( $matches [ 3 ]);
if ( $link_id == " " ) {
$link_id = strtolower ( $alt_text ); # for shortcut links like ![this][].
}
2004-09-04 16:48:57 -04:00
$alt_text = str_replace ( '"' , '"' , $alt_text );
if ( isset ( $md_urls [ $link_id ])) {
$url = $md_urls [ $link_id ];
# We've got to encode these to avoid conflicting with italics/bold.
$url = str_replace ( array ( '*' , '_' ),
array ( $md_escape_table [ '*' ], $md_escape_table [ '_' ]),
$url );
2004-04-18 23:50:43 -04:00
$result = " <img src= \" $url\ " alt = \ " $alt_text\ " " ;
2004-09-04 16:48:57 -04:00
if ( isset ( $md_titles [ $link_id ])) {
$title = $md_titles [ $link_id ];
$title = str_replace ( array ( '*' , '_' ),
array ( $md_escape_table [ '*' ],
$md_escape_table [ '_' ]), $title );
2004-04-18 23:50:43 -04:00
$result .= " title= \" $title\ " " ;
}
2004-09-04 16:48:57 -04:00
$result .= $md_empty_element_suffix ;
2004-04-18 23:50:43 -04:00
}
else {
2004-09-04 16:48:57 -04:00
# If there's no such link ID, leave intact:
2004-04-18 23:50:43 -04:00
$result = $whole_match ;
}
return $result ;
}
function _DoImages_inline_callback ( $matches ) {
2004-09-04 16:48:57 -04:00
global $md_empty_element_suffix , $md_escape_table ;
2005-01-24 01:30:16 -05:00
$whole_match = $matches [ 1 ];
$alt_text = $matches [ 2 ];
$url = $matches [ 3 ];
$title = '' ;
2004-09-04 16:48:57 -04:00
if ( isset ( $matches [ 6 ])) {
2005-01-24 01:30:16 -05:00
$title = $matches [ 6 ];
2004-09-04 16:48:57 -04:00
}
2004-04-18 23:50:43 -04:00
2004-09-04 16:48:57 -04:00
$alt_text = str_replace ( '"' , '"' , $alt_text );
$title = str_replace ( '"' , '"' , $title );
# We've got to encode these to avoid conflicting with italics/bold.
$url = str_replace ( array ( '*' , '_' ),
array ( $md_escape_table [ '*' ], $md_escape_table [ '_' ]),
$url );
2004-04-18 23:50:43 -04:00
$result = " <img src= \" $url\ " alt = \ " $alt_text\ " " ;
if ( isset ( $title )) {
2004-09-04 16:48:57 -04:00
$title = str_replace ( array ( '*' , '_' ),
array ( $md_escape_table [ '*' ], $md_escape_table [ '_' ]),
$title );
$result .= " title= \" $title\ " " ; # $title already quoted
2004-04-18 23:50:43 -04:00
}
2004-09-04 16:48:57 -04:00
$result .= $md_empty_element_suffix ;
2004-04-18 23:50:43 -04:00
return $result ;
}
function _DoHeaders ( $text ) {
2005-01-24 01:30:16 -05:00
# Setext-style headers:
# Header 1
# ========
#
# Header 2
# --------
#
2004-04-18 23:50:43 -04:00
$text = preg_replace (
2005-01-24 01:30:16 -05:00
array ( '{ ^(.+)[ \t]*\n=+[ \t]*\n+ }emx' ,
'{ ^(.+)[ \t]*\n-+[ \t]*\n+ }emx' ),
2004-04-18 23:50:43 -04:00
array ( " '<h1>'._RunSpanGamut(_UnslashQuotes(' \\ 1')).'</h1> \n \n ' " ,
" '<h2>'._RunSpanGamut(_UnslashQuotes(' \\ 1')).'</h2> \n \n ' " ),
$text );
2005-01-24 01:30:16 -05:00
# atx-style headers:
# # Header 1
# ## Header 2
# ## Header 2 with closing hashes ##
# ...
# ###### Header 6
#
2004-04-18 23:50:43 -04:00
$text = preg_replace ( " {
^ ( \\ #{1,6}) # $1 = string of #'s
[ \\t ] *
( .+ ? ) # $2 = Header text
[ \\t ] *
\\ #* # optional closing #'s (not counted)
\\n +
} xme " ,
" '<h'.strlen(' \\ 1').'>'._RunSpanGamut(_UnslashQuotes(' \\ 2')).'</h'.strlen(' \\ 1').'> \n \n ' " ,
$text );
return $text ;
}
function _DoLists ( $text ) {
2004-09-04 16:48:57 -04:00
#
# Form HTML ordered (numbered) and unordered (bulleted) lists.
#
2005-01-24 01:30:16 -05:00
global $md_tab_width , $md_list_level ;
2004-09-04 16:48:57 -04:00
$less_than_tab = $md_tab_width - 1 ;
# Re-usable patterns to match list item bullets and number markers:
$marker_ul = '[*+-]' ;
$marker_ol = '\d+[.]' ;
$marker_any = " (?: $marker_ul | $marker_ol ) " ;
2004-04-18 23:50:43 -04:00
2005-01-24 01:30:16 -05:00
# Re-usable pattern to match any entirel ul or ol list:
$whole_list = '
( # $1 = whole list
( # $2
[ ]{ 0 , '.$less_than_tab.' }
( '.$marker_any.' ) # $3 = first list item marker
[ \t ] +
)
( ? s :.+ ? )
( # $4
\z
|
\n { 2 ,}
( ? = \S )
( ? ! # Negative lookahead for another list item marker
[ \t ] *
'.$marker_any.' [ \t ] +
2004-04-18 23:50:43 -04:00
)
2005-01-24 01:30:16 -05:00
)
)
' ; // mx
# We use a different prefix before nested lists than top-level lists.
# See extended comment in _ProcessListItems().
if ( $md_list_level ) {
$text = preg_replace_callback ( ' {
^
'.$whole_list.'
} mx ' ,
'_DoLists_callback' , $text );
}
else {
$text = preg_replace_callback ( ' {
( ? : ( ? <= \n\n ) | \A\n ? )
'.$whole_list.'
} mx ' ,
'_DoLists_callback' , $text );
}
2004-04-18 23:50:43 -04:00
return $text ;
}
function _DoLists_callback ( $matches ) {
2004-09-04 16:48:57 -04:00
# Re-usable patterns to match list item bullets and number markers:
$marker_ul = '[*+-]' ;
$marker_ol = '\d+[.]' ;
$marker_any = " (?: $marker_ul | $marker_ol ) " ;
2004-04-18 23:50:43 -04:00
$list = $matches [ 1 ];
2005-01-24 01:30:16 -05:00
$list_type = preg_match ( " / $marker_ul / " , $matches [ 3 ]) ? " ul " : " ol " ;
2004-09-04 16:48:57 -04:00
# Turn double returns into triple returns, so that we can make a
# paragraph for the last item in a list, if necessary:
2004-04-18 23:50:43 -04:00
$list = preg_replace ( " / \n { 2,}/ " , " \n \n \n " , $list );
2004-09-04 16:48:57 -04:00
$result = _ProcessListItems ( $list , $marker_any );
2005-01-24 01:30:16 -05:00
$result = " < $list_type > \n " . $result . " </ $list_type > \n " ;
2004-04-18 23:50:43 -04:00
return $result ;
}
2004-09-04 16:48:57 -04:00
function _ProcessListItems ( $list_str , $marker_any ) {
2005-01-24 01:30:16 -05:00
#
# Process the contents of a single ordered or unordered list, splitting it
# into individual list items.
#
global $md_list_level ;
# The $md_list_level global keeps track of when we're inside a list.
# Each time we enter a list, we increment it; when we leave a list,
# we decrement. If it's zero, we're not in a list anymore.
#
# We do this because when we're not inside a list, we want to treat
# something like this:
#
# I recommend upgrading to version
# 8. Oops, now this line is treated
# as a sub-list.
#
# As a single paragraph, despite the fact that the second line starts
# with a digit-period-space sequence.
#
# Whereas when we're inside a list (or sub-list), that line will be
# treated as the start of a sub-list. What a kludge, huh? This is
# an aspect of Markdown's syntax that's hard to parse perfectly
# without resorting to mind-reading. Perhaps the solution is to
# change the syntax rules such that sub-lists must start with a
# starting cardinal number; e.g. "1." or "a.".
$md_list_level ++ ;
2004-04-18 23:50:43 -04:00
# trim trailing blank lines:
$list_str = preg_replace ( " / \n { 2,} \\ z/ " , " \n " , $list_str );
$list_str = preg_replace_callback ( ' {
( \n ) ? # leading line = $1
( ^ [ \t ] * ) # leading whitespace = $2
2004-09-04 16:48:57 -04:00
( '.$marker_any.' ) [ \t ] + # list marker = $3
2004-04-18 23:50:43 -04:00
(( ? s :.+ ? ) # list item text = $4
( \n { 1 , 2 }))
2004-09-04 16:48:57 -04:00
( ? = \n * ( \z | \2 ( '.$marker_any.' ) [ \t ] + ))
2004-04-18 23:50:43 -04:00
} xm ' ,
'_ProcessListItems_callback' , $list_str );
2005-01-24 01:30:16 -05:00
$md_list_level -- ;
2004-04-18 23:50:43 -04:00
return $list_str ;
}
function _ProcessListItems_callback ( $matches ) {
$item = $matches [ 4 ];
2005-01-24 01:30:16 -05:00
$leading_line =& $matches [ 1 ];
$leading_space =& $matches [ 2 ];
2004-04-18 23:50:43 -04:00
if ( $leading_line || preg_match ( '/\n{2,}/' , $item )) {
$item = _RunBlockGamut ( _Outdent ( $item ));
}
else {
# Recursion for sub-lists:
$item = _DoLists ( _Outdent ( $item ));
$item = rtrim ( $item , " \n " );
$item = _RunSpanGamut ( $item );
}
return " <li> " . $item . " </li> \n " ;
}
function _DoCodeBlocks ( $text ) {
2005-01-24 01:30:16 -05:00
#
# Process Markdown `<pre><code>` blocks.
#
2004-09-04 16:48:57 -04:00
global $md_tab_width ;
2004-04-18 23:50:43 -04:00
$text = preg_replace_callback ( " {
2004-09-04 16:48:57 -04:00
( ? : \\n\\n | \\A )
( # $1 = the code block -- one or more lines, starting with a space/tab
2004-04-18 23:50:43 -04:00
( ? :
2004-09-04 16:48:57 -04:00
( ? : [ ] \ { $md_tab_width } | \\t ) # Lines must start with a tab or a tab-width of spaces
.* \\n +
2004-04-18 23:50:43 -04:00
) +
)
2004-09-04 16:48:57 -04:00
(( ? =^ [ ]{ 0 , $md_tab_width } \\S ) | \\Z ) # Lookahead for non-space at line-start, or end of doc
2004-04-18 23:50:43 -04:00
} xm " ,
'_DoCodeBlocks_callback' , $text );
return $text ;
}
function _DoCodeBlocks_callback ( $matches ) {
2004-09-04 16:48:57 -04:00
$codeblock = $matches [ 1 ];
2004-04-18 23:50:43 -04:00
$codeblock = _EncodeCode ( _Outdent ( $codeblock ));
2005-01-24 01:30:16 -05:00
// $codeblock = _Detab($codeblock);
2004-04-18 23:50:43 -04:00
# trim leading newlines and trailing whitespace
$codeblock = preg_replace ( array ( '/\A\n+/' , '/\s+\z/' ), '' , $codeblock );
2004-09-04 16:48:57 -04:00
$result = " \n \n <pre><code> " . $codeblock . " \n </code></pre> \n \n " ;
2004-04-18 23:50:43 -04:00
return $result ;
}
function _DoCodeSpans ( $text ) {
2005-01-24 01:30:16 -05:00
#
# * Backtick quotes are used for <code></code> spans.
#
# * You can use multiple backticks as the delimiters if you want to
# include literal backticks in the code span. So, this input:
#
# Just type ``foo `bar` baz`` at the prompt.
#
# Will translate to:
#
# <p>Just type <code>foo `bar` baz</code> at the prompt.</p>
#
# There's no arbitrary limit to the number of backticks you
# can use as delimters. If you need three consecutive backticks
# in your code, use four for delimiters, etc.
#
# * You can use spaces to get literal backticks at the edges:
#
# ... type `` `bar` `` ...
#
# Turns to:
#
# ... type <code>`bar`</code> ...
#
2004-04-18 23:50:43 -04:00
$text = preg_replace_callback ( " @
2004-09-04 16:48:57 -04:00
( `+) # $1 = Opening run of `
( .+ ? ) # $2 = The code block
2004-04-18 23:50:43 -04:00
( ? <! ` )
\\1
( ? ! ` )
@ xs " ,
'_DoCodeSpans_callback' , $text );
return $text ;
}
function _DoCodeSpans_callback ( $matches ) {
$c = $matches [ 2 ];
$c = preg_replace ( '/^[ \t]*/' , '' , $c ); # leading whitespace
$c = preg_replace ( '/[ \t]*$/' , '' , $c ); # trailing whitespace
$c = _EncodeCode ( $c );
return " <code> $c </code> " ;
}
function _EncodeCode ( $_ ) {
2005-01-24 01:30:16 -05:00
#
# Encode/escape certain characters inside Markdown code runs.
# The point is that in code, these characters are literals,
# and lose their special Markdown meanings.
#
2004-09-04 16:48:57 -04:00
global $md_escape_table ;
2004-04-18 23:50:43 -04:00
2005-01-24 01:30:16 -05:00
# Encode all ampersands; HTML entities are not
# entities within a Markdown code span.
2004-04-18 23:50:43 -04:00
$_ = str_replace ( '&' , '&' , $_ );
2005-01-24 01:30:16 -05:00
# Do the angle bracket song and dance:
2004-04-18 23:50:43 -04:00
$_ = str_replace ( array ( '<' , '>' ),
array ( '<' , '>' ), $_ );
2005-01-24 01:30:16 -05:00
# Now, escape characters that are magic in Markdown:
2004-09-04 16:48:57 -04:00
$_ = str_replace ( array_keys ( $md_escape_table ),
array_values ( $md_escape_table ), $_ );
2004-04-18 23:50:43 -04:00
return $_ ;
}
function _DoItalicsAndBold ( $text ) {
# <strong> must go first:
2005-01-24 01:30:16 -05:00
$text = preg_replace ( '{ (\*\*|__) (?=\S) (.+?[*_]*) (?<=\S) \1 }sx' ,
2004-04-18 23:50:43 -04:00
'<strong>\2</strong>' , $text );
# Then <em>:
$text = preg_replace ( '{ (\*|_) (?=\S) (.+?) (?<=\S) \1 }sx' ,
'<em>\2</em>' , $text );
return $text ;
}
function _DoBlockQuotes ( $text ) {
$text = preg_replace_callback ( ' /
( # Wrap whole match in $1
(
^ [ \t ] *> [ \t ] ? # ">" at the start of a line
.+ \n # rest of the first line
( .+ \n ) * # subsequent consecutive lines
\n * # blanks
) +
)
/ xm ' ,
'_DoBlockQuotes_callback' , $text );
return $text ;
}
function _DoBlockQuotes_callback ( $matches ) {
$bq = $matches [ 1 ];
2004-09-04 16:48:57 -04:00
# trim one level of quoting - trim whitespace-only lines
$bq = preg_replace ( array ( '/^[ \t]*>[ \t]?/m' , '/^[ \t]+$/m' ), '' , $bq );
2004-04-18 23:50:43 -04:00
$bq = _RunBlockGamut ( $bq ); # recurse
2004-09-04 16:48:57 -04:00
$bq = preg_replace ( '/^/m' , " " , $bq );
# These leading spaces screw with <pre> content, so we need to fix that:
$bq = preg_replace_callback ( '{(\s*<pre>.+?</pre>)}sx' ,
'_DoBlockQuotes_callback2' , $bq );
2004-04-18 23:50:43 -04:00
return " <blockquote> \n $bq\n </blockquote> \n \n " ;
}
2004-09-04 16:48:57 -04:00
function _DoBlockQuotes_callback2 ( $matches ) {
$pre = $matches [ 1 ];
$pre = preg_replace ( '/^ /m' , '' , $pre );
return $pre ;
}
2004-04-18 23:50:43 -04:00
function _FormParagraphs ( $text ) {
2005-01-24 01:30:16 -05:00
#
# Params:
# $text - string to process with html <p> tags
#
2004-09-04 16:48:57 -04:00
global $md_html_blocks ;
2004-04-18 23:50:43 -04:00
# Strip leading and trailing lines:
$text = preg_replace ( array ( '/\A\n+/' , '/\n+\z/' ), '' , $text );
2004-09-04 16:48:57 -04:00
$grafs = preg_split ( '/\n{2,}/' , $text , - 1 , PREG_SPLIT_NO_EMPTY );
2004-04-18 23:50:43 -04:00
2005-01-24 01:30:16 -05:00
#
# Wrap <p> tags.
#
2004-04-18 23:50:43 -04:00
foreach ( $grafs as $key => $value ) {
2004-09-04 16:48:57 -04:00
if ( ! isset ( $md_html_blocks [ $value ] )) {
2004-04-18 23:50:43 -04:00
$value = _RunSpanGamut ( $value );
$value = preg_replace ( '/^([ \t]*)/' , '<p>' , $value );
$value .= " </p> " ;
$grafs [ $key ] = $value ;
}
}
2005-01-24 01:30:16 -05:00
#
# Unhashify HTML blocks
#
2004-04-18 23:50:43 -04:00
foreach ( $grafs as $key => $value ) {
2004-09-04 16:48:57 -04:00
if ( isset ( $md_html_blocks [ $value ] )) {
$grafs [ $key ] = $md_html_blocks [ $value ];
2004-04-18 23:50:43 -04:00
}
}
return implode ( " \n \n " , $grafs );
}
function _EncodeAmpsAndAngles ( $text ) {
2005-01-24 01:30:16 -05:00
# Smart processing for ampersands and angle brackets that need to be encoded.
# Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
# http://bumppo.net/projects/amputator/
2004-09-04 16:48:57 -04:00
$text = preg_replace ( '/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/' ,
2004-04-18 23:50:43 -04:00
'&' , $text );;
# Encode naked <'s
$text = preg_replace ( '{<(?![a-z/?\$!])}i' , '<' , $text );
return $text ;
}
function _EncodeBackslashEscapes ( $text ) {
2005-01-24 01:30:16 -05:00
#
# Parameter: String.
# Returns: The string, with after processing the following backslash
# escape sequences.
#
2004-09-04 16:48:57 -04:00
global $md_escape_table , $md_backslash_escape_table ;
2004-04-18 23:50:43 -04:00
# Must process escaped backslashes first.
2004-09-04 16:48:57 -04:00
return str_replace ( array_keys ( $md_backslash_escape_table ),
array_values ( $md_backslash_escape_table ), $text );
2004-04-18 23:50:43 -04:00
}
function _DoAutoLinks ( $text ) {
$text = preg_replace ( " !<((https?|ftp):[^' \" > \\ s]+)>! " ,
'<a href="\1">\1</a>' , $text );
2004-09-04 16:48:57 -04:00
2004-04-18 23:50:43 -04:00
# Email addresses: <address@domain.foo>
$text = preg_replace ( ' {
<
2005-01-24 01:30:16 -05:00
( ? : mailto : ) ?
2004-04-18 23:50:43 -04:00
(
[ -. \w ] +
\ @
[ - a - z0 - 9 ] + ( \ . [ - a - z0 - 9 ] + ) * \ . [ a - z ] +
)
>
} exi ' ,
" _EncodeEmailAddress(_UnescapeSpecialChars(_UnslashQuotes(' \\ 1'))) " ,
$text );
2004-09-04 16:48:57 -04:00
2004-04-18 23:50:43 -04:00
return $text ;
}
function _EncodeEmailAddress ( $addr ) {
2005-01-24 01:30:16 -05:00
#
# Input: an email address, e.g. "foo@example.com"
#
# Output: the email address as a mailto link, with each character
# of the address encoded as either a decimal or hex entity, in
# the hopes of foiling most address harvesting spam bots. E.g.:
#
# <a href="mailto:foo@e
# xample.com">foo
# @example.com</a>
#
# Based by a filter by Matthew Wickline, posted to the BBEdit-Talk
# mailing list: <http://tinyurl.com/yu7ue>
#
2004-04-18 23:50:43 -04:00
$addr = " mailto: " . $addr ;
$length = strlen ( $addr );
2004-09-04 16:48:57 -04:00
# leave ':' alone (to spot mailto: later)
2004-04-18 23:50:43 -04:00
$addr = preg_replace_callback ( '/([^\:])/' ,
'_EncodeEmailAddress_callback' , $addr );
$addr = " <a href= \" $addr\ " > $addr </ a > " ;
2004-09-04 16:48:57 -04:00
# strip the mailto: from the visible part
2004-04-18 23:50:43 -04:00
$addr = preg_replace ( '/">.+?:/' , '">' , $addr );
return $addr ;
}
function _EncodeEmailAddress_callback ( $matches ) {
$char = $matches [ 1 ];
$r = rand ( 0 , 100 );
2004-09-04 16:48:57 -04:00
# roughly 10% raw, 45% hex, 45% dec
# '@' *must* be encoded. I insist.
2004-04-18 23:50:43 -04:00
if ( $r > 90 && $char != '@' ) return $char ;
if ( $r < 45 ) return '&#x' . dechex ( ord ( $char )) . ';' ;
return '&#' . ord ( $char ) . ';' ;
}
function _UnescapeSpecialChars ( $text ) {
2005-01-24 01:30:16 -05:00
#
# Swap back in all the special characters we've hidden.
#
2004-09-04 16:48:57 -04:00
global $md_escape_table ;
return str_replace ( array_values ( $md_escape_table ),
array_keys ( $md_escape_table ), $text );
2004-04-18 23:50:43 -04:00
}
2005-01-24 01:30:16 -05:00
# _TokenizeHTML is shared between PHP Markdown and PHP SmartyPants.
# We only define it if it is not already defined.
if ( ! function_exists ( '_TokenizeHTML' )) :
function _TokenizeHTML ( $str ) {
#
# Parameter: String containing HTML markup.
# Returns: An array of the tokens comprising the input
# string. Each token is either a tag (possibly with nested,
# tags contained therein, such as <a href="<MTFoo>">, or a
# run of text between tags. Each element of the array is a
# two-element array; the first is either 'tag' or 'text';
# the second is the actual value.
#
#
# Regular expression derived from the _tokenize() subroutine in
# Brad Choate's MTRegex plugin.
# <http://www.bradchoate.com/past/mtregex.php>
#
$index = 0 ;
$tokens = array ();
2004-04-18 23:50:43 -04:00
2005-01-24 01:30:16 -05:00
$match = '(?s:<!(?:--.*?--\s*)+>)|' . # comment
'(?s:<\?.*?\?>)|' . # processing instruction
'(?:</?[\w:$]+\b(?>[^"\'>]+|"[^"]*"|\'[^\']*\')*>)' ; # regular tags
2004-09-04 16:48:57 -04:00
2005-01-24 01:30:16 -05:00
$parts = preg_split ( " { ( $match )} " , $str , - 1 , PREG_SPLIT_DELIM_CAPTURE );
2004-09-04 16:48:57 -04:00
2005-01-24 01:30:16 -05:00
foreach ( $parts as $part ) {
if ( ++ $index % 2 && $part != '' )
array_push ( $tokens , array ( 'text' , $part ));
else
array_push ( $tokens , array ( 'tag' , $part ));
2004-04-18 23:50:43 -04:00
}
2005-01-24 01:30:16 -05:00
return $tokens ;
2004-04-18 23:50:43 -04:00
}
2005-01-24 01:30:16 -05:00
endif ;
2004-04-18 23:50:43 -04:00
function _Outdent ( $text ) {
2005-01-24 01:30:16 -05:00
#
# Remove one level of line-leading tabs or spaces
#
2004-09-04 16:48:57 -04:00
global $md_tab_width ;
return preg_replace ( " /^( \\ t|[ ] { 1, $md_tab_width })/m " , " " , $text );
2004-04-18 23:50:43 -04:00
}
function _Detab ( $text ) {
2005-01-24 01:30:16 -05:00
#
# Replace tabs with the appropriate amount of space.
#
2004-09-04 16:48:57 -04:00
global $md_tab_width ;
2005-01-24 01:30:16 -05:00
# For each line we separate the line in blocks delemited by
# tab characters. Then we reconstruct the line adding the appropriate
# number of space charcters.
$lines = explode ( " \n " , $text );
$text = " " ;
foreach ( $lines as $line ) {
# Split in blocks.
$blocks = explode ( " \t " , $line );
# Add each blocks to the line.
$line = $blocks [ 0 ];
unset ( $blocks [ 0 ]); # Do not add first block twice.
foreach ( $blocks as $block ) {
# Calculate amount of space, insert spaces, insert block.
$amount = $md_tab_width - strlen ( $line ) % $md_tab_width ;
$line .= str_repeat ( " " , $amount ) . $block ;
}
$text .= " $line\n " ;
}
2004-04-18 23:50:43 -04:00
return $text ;
}
function _UnslashQuotes ( $text ) {
2005-01-24 01:30:16 -05:00
#
# This function is useful to remove automaticaly slashed double quotes
# when using preg_replace and evaluating an expression.
# Parameter: String.
# Returns: The string with any slash-double-quote (\") sequence replaced
# by a single double quote.
#
2004-04-18 23:50:43 -04:00
return str_replace ( '\"' , '"' , $text );
}
2005-01-24 01:30:16 -05:00
/*
PHP Markdown
============
Description
-----------
This is a PHP translation of the original Markdown formatter written in
Perl by John Gruber .
Markdown is a text - to - HTML filter ; it translates an easy - to - read /
easy - to - write structured text format into HTML . Markdown ' s text format
is most similar to that of plain text email , and supports features such
as headers , * emphasis * , code blocks , blockquotes , and links .
Markdown ' s syntax is designed not as a generic markup language , but
specifically to serve as a front - end to ( X ) HTML . You can use span - level
HTML tags anywhere in a Markdown document , and you can use block level
HTML tags ( like < div > and < table > as well ) .
For more information about Markdown ' s syntax , see :
< http :// daringfireball . net / projects / markdown />
Bugs
----
To file bug reports please send email to :
< michel . fortin @ michelf . com >
Please include with your report : ( 1 ) the example input ; ( 2 ) the output you
expected ; ( 3 ) the output Markdown actually produced .
Version History
---------------
See the readme file for detailed release notes for this version .
1.0 . 1 - 17 Dec 2004
1.0 - 21 Aug 2004
Author & Contributors
---------------------
Original Perl version by John Gruber
< http :// daringfireball . net />
PHP port and other contributions by Michel Fortin
< http :// www . michelf . com />
Copyright and License
---------------------
Copyright ( c ) 2004 Michel Fortin
< http :// www . michelf . com />
All rights reserved .
Copyright ( c ) 2003 - 2004 John Gruber
< http :// daringfireball . net />
All rights reserved .
Redistribution and use in source and binary forms , with or without
modification , are permitted provided that the following conditions are
met :
* Redistributions of source code must retain the above copyright notice ,
this list of conditions and the following disclaimer .
* Redistributions in binary form must reproduce the above copyright
notice , this list of conditions and the following disclaimer in the
documentation and / or other materials provided with the distribution .
* Neither the name " Markdown " nor the names of its contributors may
be used to endorse or promote products derived from this software
without specific prior written permission .
This software is provided by the copyright holders and contributors " as
is " and any express or implied warranties, including, but not limited
to , the implied warranties of merchantability and fitness for a
particular purpose are disclaimed . In no event shall the copyright owner
or contributors be liable for any direct , indirect , incidental , special ,
exemplary , or consequential damages ( including , but not limited to ,
procurement of substitute goods or services ; loss of use , data , or
profits ; or business interruption ) however caused and on any theory of
liability , whether in contract , strict liability , or tort ( including
negligence or otherwise ) arising in any way out of the use of this
software , even if advised of the possibility of such damage .
*/
2004-04-18 23:50:43 -04:00
?>