2004-07-30 04:02:58 +00:00
< ? php
2009-05-26 02:46:09 +00:00
/**
*
* Markdown Extra - A text - to - HTML conversion tool for web writers
*
* PHP Markdown & Extra
* Copyright ( c ) 2004 - 2007 Michel Fortin
* < http :// www . michelf . com / projects / php - markdown />
*
* Original Markdown
* Copyright ( c ) 2004 - 2006 John Gruber
* < http :// daringfireball . net / projects / markdown />
*
* @ package moodlecore
* @ copyright ( c ) 2004 - 2006 John Gruber
*/
/** MARKDOWN_VERSION = 1.0.1j */
2007-09-17 12:37:51 +00:00
define ( 'MARKDOWN_VERSION' , " 1.0.1j " ); # Tue 4 Sep 2007
2009-05-26 02:46:09 +00:00
/** MARKDOWNEXTRA_VERSION = 1.1.6 */
2007-09-17 12:37:51 +00:00
define ( 'MARKDOWNEXTRA_VERSION' , " 1.1.6 " ); # Tue 4 Sep 2007
2004-07-30 04:02:58 +00:00
#
# Global default settings:
#
2007-02-08 11:23:25 +00:00
2009-05-26 02:46:09 +00:00
/** Change to ">" for HTML output */
2007-09-17 12:37:51 +00:00
@ define ( 'MARKDOWN_EMPTY_ELEMENT_SUFFIX' , " /> " );
2007-02-08 11:23:25 +00:00
2009-05-26 02:46:09 +00:00
/** Define the width of a tab for code blocks. */
2007-09-17 12:37:51 +00:00
@ define ( 'MARKDOWN_TAB_WIDTH' , 4 );
2007-02-08 11:23:25 +00:00
2009-05-26 02:46:09 +00:00
/** Optional title attribute for footnote links and backlinks. */
2007-09-17 12:37:51 +00:00
@ define ( 'MARKDOWN_FN_LINK_TITLE' , " " );
@ define ( 'MARKDOWN_FN_BACKLINK_TITLE' , " " );
2007-02-08 11:23:25 +00:00
# Optional class attribute for footnote links and backlinks.
2007-09-17 12:37:51 +00:00
@ define ( 'MARKDOWN_FN_LINK_CLASS' , " " );
@ define ( 'MARKDOWN_FN_BACKLINK_CLASS' , " " );
2007-02-08 11:23:25 +00:00
2004-07-30 04:02:58 +00:00
2006-02-25 16:48:46 +00:00
#
# WordPress settings:
#
2007-02-08 11:23:25 +00:00
# Change to false to remove Markdown from posts and/or comments.
2007-09-17 12:37:51 +00:00
@ define ( 'MARKDOWN_WP_POSTS' , true );
@ define ( 'MARKDOWN_WP_COMMENTS' , true );
2007-02-08 11:23:25 +00:00
### Standard Function Interface ###
2007-09-17 12:37:51 +00:00
@ define ( 'MARKDOWN_PARSER_CLASS' , 'MarkdownExtra_Parser' );
2007-02-08 11:23:25 +00:00
function Markdown ( $text ) {
#
# Initialize the parser and return the result of its transform method.
#
# Setup static parser variable.
static $parser ;
if ( ! isset ( $parser )) {
$parser_class = MARKDOWN_PARSER_CLASS ;
$parser = new $parser_class ;
}
# Transform text using parser.
return $parser -> transform ( $text );
}
### WordPress Plugin Interface ###
2004-07-30 04:02:58 +00:00
/*
2007-02-08 11:23:25 +00:00
Plugin Name : Markdown Extra
2004-07-30 04:02:58 +00:00
Plugin URI : http :// www . michelf . com / projects / php - markdown /
2004-08-29 15:46:22 +00:00
Description : < a href = " http://daringfireball.net/projects/markdown/syntax " > Markdown syntax </ a > allows you to write using an easy - to - read , easy - to - write plain text format . Based on the original Perl version by < a href = " http://daringfireball.net/ " > John Gruber </ a >. < a href = " http://www.michelf.com/projects/php-markdown/ " > More ...</ a >
2007-09-17 12:37:51 +00:00
Version : 1.1 . 6
2004-07-30 04:02:58 +00:00
Author : Michel Fortin
Author URI : http :// www . michelf . com /
*/
2007-02-08 11:23:25 +00:00
2004-07-30 04:02:58 +00:00
if ( isset ( $wp_version )) {
2006-02-25 16:48:46 +00:00
# More details about how it works here:
# <http://www.michelf.com/weblog/2005/wordpress-text-flow-vs-markdown/>
# Post content and excerpts
2007-02-08 11:23:25 +00:00
# - Remove WordPress paragraph generator.
# - Run Markdown on excerpt, then remove all tags.
# - Add paragraph tag around the excerpt, but remove it for the excerpt rss.
if ( MARKDOWN_WP_POSTS ) {
2007-08-21 09:54:49 +00:00
remove_filter ( 'the_content' , 'wpautop' );
remove_filter ( 'the_content_rss' , 'wpautop' );
remove_filter ( 'the_excerpt' , 'wpautop' );
2006-02-25 16:48:46 +00:00
add_filter ( 'the_content' , 'Markdown' , 6 );
2007-08-21 09:54:49 +00:00
add_filter ( 'the_content_rss' , 'Markdown' , 6 );
2006-02-25 16:48:46 +00:00
add_filter ( 'get_the_excerpt' , 'Markdown' , 6 );
add_filter ( 'get_the_excerpt' , 'trim' , 7 );
2007-02-08 11:23:25 +00:00
add_filter ( 'the_excerpt' , 'mdwp_add_p' );
add_filter ( 'the_excerpt_rss' , 'mdwp_strip_p' );
2006-02-25 16:48:46 +00:00
remove_filter ( 'content_save_pre' , 'balanceTags' , 50 );
remove_filter ( 'excerpt_save_pre' , 'balanceTags' , 50 );
add_filter ( 'the_content' , 'balanceTags' , 50 );
add_filter ( 'get_the_excerpt' , 'balanceTags' , 9 );
}
# Comments
2007-02-08 11:23:25 +00:00
# - Remove WordPress paragraph generator.
# - Remove WordPress auto-link generator.
# - Scramble important tags before passing them to the kses filter.
# - Run Markdown on excerpt then remove paragraph tags.
if ( MARKDOWN_WP_COMMENTS ) {
2007-08-21 09:54:49 +00:00
remove_filter ( 'comment_text' , 'wpautop' , 30 );
2006-02-25 16:48:46 +00:00
remove_filter ( 'comment_text' , 'make_clickable' );
add_filter ( 'pre_comment_content' , 'Markdown' , 6 );
2007-02-08 11:23:25 +00:00
add_filter ( 'pre_comment_content' , 'mdwp_hide_tags' , 8 );
add_filter ( 'pre_comment_content' , 'mdwp_show_tags' , 12 );
2006-02-25 16:48:46 +00:00
add_filter ( 'get_comment_text' , 'Markdown' , 6 );
add_filter ( 'get_comment_excerpt' , 'Markdown' , 6 );
2007-02-08 11:23:25 +00:00
add_filter ( 'get_comment_excerpt' , 'mdwp_strip_p' , 7 );
2006-02-25 16:48:46 +00:00
2007-08-21 09:54:49 +00:00
global $mdwp_hidden_tags , $mdwp_placeholders ;
$mdwp_hidden_tags = explode ( ' ' ,
'<p> </p> <pre> </pre> <ol> </ol> <ul> </ul> <li> </li>' );
$mdwp_placeholders = explode ( ' ' , str_rot13 (
'pEj07ZbbBZ U1kqgh4w4p pre2zmeN6K QTi31t9pre ol0MP1jzJR ' .
'ML5IjmbRol ulANi1NsGY J7zRLJqPul liA8ctl16T K9nhooUHli' ));
2007-02-08 11:23:25 +00:00
}
function mdwp_add_p ( $text ) {
if ( ! preg_match ( '{^$|^<(p|ul|ol|dl|pre|blockquote)>}i' , $text )) {
$text = '<p>' . $text . '</p>' ;
$text = preg_replace ( '{\n{2,}}' , " </p> \n \n <p> " , $text );
2006-02-25 16:48:46 +00:00
}
2007-02-08 11:23:25 +00:00
return $text ;
}
function mdwp_strip_p ( $t ) { return preg_replace ( '{</?p>}i' , '' , $t ); }
function mdwp_hide_tags ( $text ) {
2007-08-21 09:54:49 +00:00
global $mdwp_hidden_tags , $mdwp_placeholders ;
return str_replace ( $mdwp_hidden_tags , $mdwp_placeholders , $text );
2007-02-08 11:23:25 +00:00
}
function mdwp_show_tags ( $text ) {
2007-08-21 09:54:49 +00:00
global $mdwp_hidden_tags , $mdwp_placeholders ;
return str_replace ( $mdwp_placeholders , $mdwp_hidden_tags , $text );
2006-02-25 16:48:46 +00:00
}
2004-07-30 04:02:58 +00:00
}
2005-04-11 14:08:01 +00:00
2007-02-08 11:23:25 +00:00
### bBlog Plugin Info ###
2004-08-29 15:46:22 +00:00
function identify_modifier_markdown () {
return array (
2007-02-08 11:23:25 +00:00
'name' => 'markdown' ,
'type' => 'modifier' ,
'nicename' => 'PHP Markdown Extra' ,
'description' => 'A text-to-HTML conversion tool for web writers' ,
'authors' => 'Michel Fortin and John Gruber' ,
'licence' => 'GPL' ,
'version' => MARKDOWNEXTRA_VERSION ,
'help' => '<a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>' ,
);
2004-08-29 15:46:22 +00:00
}
2007-02-08 11:23:25 +00:00
### Smarty Modifier Interface ###
2004-08-29 15:46:22 +00:00
function smarty_modifier_markdown ( $text ) {
return Markdown ( $text );
}
2007-02-08 11:23:25 +00:00
### Textile Compatibility Mode ###
# Rename this file to "classTextile.php" and it can replace Textile everywhere.
2004-07-30 04:02:58 +00:00
if ( strcasecmp ( substr ( __FILE__ , - 16 ), " classTextile.php " ) == 0 ) {
2004-08-29 15:46:22 +00:00
# Try to include PHP SmartyPants. Should be in the same directory.
@ include_once 'smartypants.php' ;
# Fake Textile class. It calls Markdown instead.
2009-05-26 02:46:09 +00:00
/**
* @ package moodlecore
* @ copyright ( c ) 2004 - 2006 John Gruber
*/
2004-08-29 15:46:22 +00:00
class Textile {
2007-02-08 11:23:25 +00:00
function TextileThis ( $text , $lite = '' , $encode = '' ) {
if ( $lite == '' && $encode == '' ) $text = Markdown ( $text );
if ( function_exists ( 'SmartyPants' )) $text = SmartyPants ( $text );
2004-08-29 15:46:22 +00:00
return $text ;
}
2007-02-08 11:23:25 +00:00
# Fake restricted version: restrictions are not supported for now.
function TextileRestricted ( $text , $lite = '' , $noimage = '' ) {
return $this -> TextileThis ( $text , $lite );
}
# Workaround to ensure compatibility with TextPattern 4.0.3.
function blockLite ( $text ) { return $text ; }
2004-08-29 15:46:22 +00:00
}
2004-07-30 04:02:58 +00:00
}
2004-08-29 15:46:22 +00:00
2009-05-26 02:46:09 +00:00
/**
* Markdown Parser Class
*
* @ package moodlecore
* @ copyright ( c ) 2004 - 2006 John Gruber
*/
2004-07-30 04:02:58 +00:00
2007-02-08 11:23:25 +00:00
class Markdown_Parser {
2004-07-30 04:02:58 +00:00
2007-02-08 11:23:25 +00:00
# Regex to match balanced [brackets].
# Needed to insert a maximum bracked depth while converting to PHP.
var $nested_brackets_depth = 6 ;
var $nested_brackets ;
2007-08-21 09:54:49 +00:00
var $nested_url_parenthesis_depth = 4 ;
var $nested_url_parenthesis ;
2004-07-30 04:02:58 +00:00
2007-02-08 11:23:25 +00:00
# Table of hash values for escaped characters:
var $escape_chars = '\`*_{}[]()>#+-.!' ;
2006-02-25 16:48:46 +00:00
2007-02-08 11:23:25 +00:00
# Change to ">" for HTML output.
var $empty_element_suffix = MARKDOWN_EMPTY_ELEMENT_SUFFIX ;
var $tab_width = MARKDOWN_TAB_WIDTH ;
2007-08-21 09:54:49 +00:00
# Change to `true` to disallow markup or entities.
var $no_markup = false ;
var $no_entities = false ;
2004-07-30 04:02:58 +00:00
2007-02-08 11:23:25 +00:00
function Markdown_Parser () {
#
# Constructor function. Initialize appropriate member variables.
#
$this -> _initDetab ();
$this -> nested_brackets =
str_repeat ( '(?>[^\[\]]+|\[' , $this -> nested_brackets_depth ) .
str_repeat ( '\])*' , $this -> nested_brackets_depth );
2007-08-21 09:54:49 +00:00
$this -> nested_url_parenthesis =
str_repeat ( '(?>[^()\s]+|\(' , $this -> nested_url_parenthesis_depth ) .
str_repeat ( '(?>\)))*' , $this -> nested_url_parenthesis_depth );
2007-02-08 11:23:25 +00:00
# Sort document, block, and span gamut in ascendent priority order.
asort ( $this -> document_gamut );
asort ( $this -> block_gamut );
asort ( $this -> span_gamut );
}
2004-08-29 15:46:22 +00:00
2004-07-30 04:02:58 +00:00
2007-02-08 11:23:25 +00:00
# Internal hashes used during transformation.
var $urls = array ();
var $titles = array ();
2007-09-17 12:37:51 +00:00
var $html_hashes = array ();
2007-08-21 09:54:49 +00:00
# Status flag to avoid invalid nesting.
var $in_anchor = false ;
2006-02-25 16:48:46 +00:00
2004-07-30 04:02:58 +00:00
2007-02-08 11:23:25 +00:00
function transform ( $text ) {
#
# Main function. The order in which other subs are called here is
# essential. Link and image substitutions need to happen before
# _EscapeSpecialCharsWithinTagAttributes(), so that any *'s or _'s in the <a>
# and <img> tags get encoded.
#
# Clear the global hashes. If we don't clear these, you get conflicts
# from other articles when generating a page which contains more than
# one article (e.g. an index page that shows the N most recent
# articles):
$this -> urls = array ();
$this -> titles = array ();
$this -> html_hashes = array ();
# Standardize line endings:
# DOS to Unix and Mac to Unix
2007-09-17 12:37:51 +00:00
$text = preg_replace ( '{\r\n?}' , " \n " , $text );
2007-02-08 11:23:25 +00:00
# Make sure $text ends with a couple of newlines:
$text .= " \n \n " ;
# Convert all tabs to spaces.
$text = $this -> detab ( $text );
# Turn block-level HTML blocks into hash entries
$text = $this -> hashHTMLBlocks ( $text );
# Strip any lines consisting only of spaces and tabs.
# This makes subsequent regexen easier to write, because we can
# match consecutive blank lines with /\n+/ instead of something
2007-08-21 09:54:49 +00:00
# contorted like /[ ]*\n+/ .
$text = preg_replace ( '/^[ ]+$/m' , '' , $text );
2007-02-08 11:23:25 +00:00
# Run document gamut methods.
foreach ( $this -> document_gamut as $method => $priority ) {
$text = $this -> $method ( $text );
}
2004-07-30 04:02:58 +00:00
2007-02-08 11:23:25 +00:00
return $text . " \n " ;
}
var $document_gamut = array (
# Strip link definitions, store in hashes.
" stripLinkDefinitions " => 20 ,
" runBasicBlockGamut " => 30 ,
);
2004-07-30 04:02:58 +00:00
2007-02-08 11:23:25 +00:00
function stripLinkDefinitions ( $text ) {
#
# Strips link definitions from text, stores the URLs and titles in
# hash references.
#
$less_than_tab = $this -> tab_width - 1 ;
# Link defs are in the form: ^[id]: url "optional title"
$text = preg_replace_callback ( ' {
^ [ ]{ 0 , '.$less_than_tab.' } \ [( .+ ) \ ][ ] ? : # id = $1
2007-08-21 09:54:49 +00:00
[ ] *
2007-02-08 11:23:25 +00:00
\n ? # maybe *one* newline
2007-08-21 09:54:49 +00:00
[ ] *
2007-02-08 11:23:25 +00:00
< ? ( \S + ? ) > ? # url = $2
2007-08-21 09:54:49 +00:00
[ ] *
2007-02-08 11:23:25 +00:00
\n ? # maybe one newline
2007-08-21 09:54:49 +00:00
[ ] *
2007-02-08 11:23:25 +00:00
( ? :
( ? <= \s ) # lookbehind for whitespace
[ " (]
( .* ? ) # title = $3
[ " )]
2007-08-21 09:54:49 +00:00
[ ] *
2007-02-08 11:23:25 +00:00
) ? # title is optional
( ? : \n +| \Z )
} xm ' ,
array ( & $this , '_stripLinkDefinitions_callback' ),
$text );
return $text ;
}
function _stripLinkDefinitions_callback ( $matches ) {
$link_id = strtolower ( $matches [ 1 ]);
$this -> urls [ $link_id ] = $this -> encodeAmpsAndAngles ( $matches [ 2 ]);
if ( isset ( $matches [ 3 ]))
$this -> titles [ $link_id ] = str_replace ( '"' , '"' , $matches [ 3 ]);
return '' ; # String that will replace the block
}
2004-07-30 04:02:58 +00:00
2007-02-08 11:23:25 +00:00
function hashHTMLBlocks ( $text ) {
2007-08-21 09:54:49 +00:00
if ( $this -> no_markup ) return $text ;
2007-09-17 12:37:51 +00:00
2007-02-08 11:23:25 +00:00
$less_than_tab = $this -> tab_width - 1 ;
# Hashify HTML blocks:
# We only want to do this for block-level HTML tags, such as headers,
# lists, and tables. That's because we still want to wrap <p>s around
# "paragraphs" that are wrapped in non-block-level tags, such as anchors,
# phrase emphasis, and spans. The list of tags we're looking for is
# hard-coded:
2007-09-17 12:37:51 +00:00
#
# * List "a" is made of tags which can be both inline or block-level.
# These will be treated block-level when the start tag is alone on
# its line, otherwise they're not matched here and will be taken as
# inline later.
# * List "b" is made of tags which are always block-level;
#
$block_tags_a = 'ins|del' ;
2007-02-08 11:23:25 +00:00
$block_tags_b = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|' .
'script|noscript|form|fieldset|iframe|math' ;
# Regular expression for the content of a block tag.
$nested_tags_level = 4 ;
$attr = '
( ?> # optional tag attributes
\s # starts with whitespace
( ?>
[ ^> " /]+ # text outside quotes
|
/+ ( ? !> ) # slash not followed by ">"
|
" [^ " ] * " # text inside double quotes (tolerate " > " )
|
\ ' [ ^ \ ' ] * \ ' # text inside single quotes (tolerate ">")
) *
) ?
' ;
$content =
str_repeat ( '
( ?>
[ ^< ] + # content without tag
|
< \2 # nested opening tag
'.$attr.' # attributes
2007-09-17 12:37:51 +00:00
( ?>
2007-02-08 11:23:25 +00:00
/>
|
> ' , $nested_tags_level ) . # end of opening tag
'.*?' . # last level nested tag content
str_repeat ( '
</ \2\s *> # closing nested tag
)
|
< ( ? !/ \2\s *> # other tags with a different name
)
) * ' ,
$nested_tags_level );
2007-09-17 12:37:51 +00:00
$content2 = str_replace ( '\2' , '\3' , $content );
2007-02-08 11:23:25 +00:00
# First, look for nested blocks, e.g.:
# <div>
# <div>
# tags for inner block must be indented.
# </div>
# </div>
#
# The outermost tags must start at the left margin for this to match, and
# the inner nested divs must be indented.
# We need to do this before the next, more liberal match, because the next
# match will start at the first `<div>` and stop at the first `</div>`.
2007-09-17 12:37:51 +00:00
$text = preg_replace_callback ( ' {( ?>
( ?>
( ? <= \n\n ) # Starting after a blank line
| # or
\A\n ? # the beginning of the doc
)
( # save in $1
2004-07-30 04:02:58 +00:00
2007-09-17 12:37:51 +00:00
# Match from `\n<tag>` to `</tag>\n`, handling nested tags
# in between.
[ ]{ 0 , '.$less_than_tab.' }
2007-02-08 11:23:25 +00:00
< ( '.$block_tags_b.' ) # start tag = $2
2007-09-17 12:37:51 +00:00
'.$attr.' > # attributes followed by > and \n
2007-02-08 11:23:25 +00:00
'.$content.' # content, support nesting
</ \2 > # the matching end tag
2007-08-21 09:54:49 +00:00
[ ] * # trailing spaces/tabs
2007-02-08 11:23:25 +00:00
( ? = \n +| \Z ) # followed by a newline or end of document
2004-07-30 04:02:58 +00:00
2007-09-17 12:37:51 +00:00
| # Special version for tags of group a.
[ ]{ 0 , '.$less_than_tab.' }
< ( '.$block_tags_a.' ) # start tag = $3
'.$attr.' > [ ] * \n # attributes followed by >
'.$content2.' # content, support nesting
</ \3 > # the matching end tag
[ ] * # trailing spaces/tabs
( ? = \n +| \Z ) # followed by a newline or end of document
| # Special case just for <hr />. It was easier to make a special
# case than to make the other regex more complicated.
2007-02-08 11:23:25 +00:00
[ ]{ 0 , '.$less_than_tab.' }
< ( hr ) # start tag = $2
\b # word break
([ ^<> ]) * ? #
/ ?> # the matching end tag
2007-08-21 09:54:49 +00:00
[ ] *
2007-02-08 11:23:25 +00:00
( ? = \n { 2 ,} | \Z ) # followed by a blank line or end of document
2007-09-17 12:37:51 +00:00
| # Special case for standalone HTML comments:
2007-02-08 11:23:25 +00:00
[ ]{ 0 , '.$less_than_tab.' }
( ? s :
<!-- .* ? -->
)
2007-08-21 09:54:49 +00:00
[ ] *
2007-02-08 11:23:25 +00:00
( ? = \n { 2 ,} | \Z ) # followed by a blank line or end of document
2007-09-17 12:37:51 +00:00
| # PHP and ASP-style processor instructions (<? and <%)
2007-02-08 11:23:25 +00:00
[ ]{ 0 , '.$less_than_tab.' }
( ? s :
< ([ ? % ]) # $2
.* ?
\2 >
)
2007-08-21 09:54:49 +00:00
[ ] *
2007-02-08 11:23:25 +00:00
( ? = \n { 2 ,} | \Z ) # followed by a blank line or end of document
2007-09-17 12:37:51 +00:00
)
)} Sxmi ' ,
2007-02-08 11:23:25 +00:00
array ( & $this , '_hashHTMLBlocks_callback' ),
$text );
return $text ;
}
function _hashHTMLBlocks_callback ( $matches ) {
$text = $matches [ 1 ];
$key = $this -> hashBlock ( $text );
return " \n \n $key\n\n " ;
}
2007-09-17 12:37:51 +00:00
function hashPart ( $text , $boundary = 'X' ) {
2004-07-30 04:02:58 +00:00
#
2007-09-17 12:37:51 +00:00
# Called whenever a tag must be hashed when a function insert an atomic
# element in the text stream. Passing $text to through this function gives
# a unique text-token which will be reverted back when calling unhash.
#
# The $boundary argument specify what character should be used to surround
# the token. By convension, "B" is used for block elements that needs not
# to be wrapped into paragraph tags at the end, ":" is used for elements
# that are word separators and "S" is used for general span-level elements.
2006-02-25 16:48:46 +00:00
#
2007-02-08 11:23:25 +00:00
# Swap back any tag hash found in $text so we do not have to `unhash`
# multiple times at the end.
$text = $this -> unhash ( $text );
# Then hash the block.
2007-09-17 12:37:51 +00:00
static $i = 0 ;
$key = " $boundary\x1A " . ++ $i . $boundary ;
2007-02-08 11:23:25 +00:00
$this -> html_hashes [ $key ] = $text ;
return $key ; # String that will replace the tag.
}
2006-02-25 16:48:46 +00:00
2004-07-30 04:02:58 +00:00
2007-09-17 12:37:51 +00:00
function hashBlock ( $text ) {
2004-07-30 04:02:58 +00:00
#
2007-09-17 12:37:51 +00:00
# Shortcut function for hashPart with block-level boundaries.
2004-07-30 04:02:58 +00:00
#
2007-09-17 12:37:51 +00:00
return $this -> hashPart ( $text , 'B' );
2007-02-08 11:23:25 +00:00
}
var $block_gamut = array (
#
# These are all the transformations that form block-level
# tags like paragraphs, headers, and list items.
#
" doHeaders " => 10 ,
" doHorizontalRules " => 20 ,
2006-02-25 16:48:46 +00:00
2007-02-08 11:23:25 +00:00
" doLists " => 40 ,
" doCodeBlocks " => 50 ,
" doBlockQuotes " => 60 ,
);
function runBlockGamut ( $text ) {
#
# Run block gamut tranformations.
#
# We need to escape raw HTML in Markdown source before doing anything
# else. This need to be done for each block, and not only at the
# begining in the Markdown function since hashed blocks can be part of
# list items and could have been indented. Indented blocks would have
# been seen as a code block in a previous pass of hashHTMLBlocks.
$text = $this -> hashHTMLBlocks ( $text );
2006-02-25 16:48:46 +00:00
2007-02-08 11:23:25 +00:00
return $this -> runBasicBlockGamut ( $text );
}
function runBasicBlockGamut ( $text ) {
#
# Run block gamut tranformations, without hashing HTML blocks. This is
# useful when HTML blocks are known to be already hashed, like in the first
# whole-document pass.
#
foreach ( $this -> block_gamut as $method => $priority ) {
$text = $this -> $method ( $text );
2006-02-25 16:48:46 +00:00
}
2007-02-08 11:23:25 +00:00
# Finally form paragraph and restore hashed blocks.
$text = $this -> formParagraphs ( $text );
return $text ;
}
function doHorizontalRules ( $text ) {
# Do Horizontal Rules:
return preg_replace (
2007-09-17 12:37:51 +00:00
' {
^ [ ]{ 0 , 3 } # Leading space
([ *- _ ]) # $1: First marker
( ?> # Repeated marker group
[ ]{ 0 , 2 } # Zero, one, or two spaces.
\1 # Marker character
){ 2 ,} # Group repeated at least twice
[ ] * # Tailing spaces
$ # End of line.
} mx ' ,
2007-02-08 11:23:25 +00:00
" \n " . $this -> hashBlock ( " <hr $this->empty_element_suffix " ) . " \n " ,
$text );
}
var $span_gamut = array (
#
# These are all the transformations that occur *within* block-level
# tags like paragraphs, headers, and list items.
#
2007-09-17 12:37:51 +00:00
# Process character escapes, code spans, and inline HTML
# in one shot.
" parseSpan " => - 30 ,
2007-02-08 11:23:25 +00:00
# Process anchor and image tags. Images must come first,
# because ![foo][f] looks like an anchor.
" doImages " => 10 ,
" doAnchors " => 20 ,
2006-02-25 16:48:46 +00:00
2007-02-08 11:23:25 +00:00
# Make links out of things like `<http://example.com/>`
# Must come after doAnchors, because you can use < and >
# delimiters in inline links like [this](<url>).
" doAutoLinks " => 30 ,
" encodeAmpsAndAngles " => 40 ,
" doItalicsAndBold " => 50 ,
" doHardBreaks " => 60 ,
);
function runSpanGamut ( $text ) {
#
# Run span gamut tranformations.
#
foreach ( $this -> span_gamut as $method => $priority ) {
$text = $this -> $method ( $text );
}
return $text ;
}
function doHardBreaks ( $text ) {
# Do hard breaks:
2007-09-17 12:37:51 +00:00
return preg_replace_callback ( '/ {2,}\n/' ,
array ( & $this , '_doHardBreaks_callback' ), $text );
2007-02-08 11:23:25 +00:00
}
2007-09-17 12:37:51 +00:00
function _doHardBreaks_callback ( $matches ) {
return $this -> hashPart ( " <br $this->empty_element_suffix\n " );
2007-02-08 11:23:25 +00:00
}
function doAnchors ( $text ) {
#
# Turn Markdown link shortcuts into XHTML <a> tags.
#
2007-08-21 09:54:49 +00:00
if ( $this -> in_anchor ) return $text ;
$this -> in_anchor = true ;
2006-02-25 16:48:46 +00:00
#
2007-02-08 11:23:25 +00:00
# First, handle reference-style links: [link text] [id]
2006-02-25 16:48:46 +00:00
#
2007-02-08 11:23:25 +00:00
$text = preg_replace_callback ( ' {
( # wrap whole match in $1
\ [
( '.$this->nested_brackets.' ) # link text = $2
\ ]
[ ] ? # one optional space
( ? : \n [ ] * ) ? # one optional newline followed by spaces
\ [
( .* ? ) # id = $3
\ ]
2006-02-25 16:48:46 +00:00
)
2007-02-08 11:23:25 +00:00
} xs ' ,
array ( & $this , '_doAnchors_reference_callback' ), $text );
2006-02-25 16:48:46 +00:00
#
2007-02-08 11:23:25 +00:00
# Next, inline-style links: [link text](url "optional title")
2006-02-25 16:48:46 +00:00
#
2007-02-08 11:23:25 +00:00
$text = preg_replace_callback ( ' {
( # wrap whole match in $1
\ [
( '.$this->nested_brackets.' ) # link text = $2
\ ]
\ ( # literal paren
2007-08-21 09:54:49 +00:00
[ ] *
( ? :
< ( \S * ) > # href = $3
|
( '.$this->nested_url_parenthesis.' ) # href = $4
)
[ ] *
( # $5
([ \ ' " ]) # quote char = $ 6
( .* ? ) # Title = $7
\6 # matching quote
[ ] * # ignore any spaces/tabs between closing quote and )
2007-02-08 11:23:25 +00:00
) ? # title is optional
\ )
)
} xs ' ,
array ( & $this , '_DoAnchors_inline_callback' ), $text );
2006-02-25 16:48:46 +00:00
#
2007-02-08 11:23:25 +00:00
# Last, handle reference-style shortcuts: [link text]
# These must come last in case you've also got [link test][1]
# or [link test](/foo)
2006-02-25 16:48:46 +00:00
#
2007-02-08 11:23:25 +00:00
// $text = preg_replace_callback('{
// ( # wrap whole match in $1
// \[
// ([^\[\]]+) # link text = $2; can\'t contain [ or ]
// \]
// )
// }xs',
// array(&$this, '_doAnchors_reference_callback'), $text);
2007-08-21 09:54:49 +00:00
$this -> in_anchor = false ;
2007-02-08 11:23:25 +00:00
return $text ;
}
function _doAnchors_reference_callback ( $matches ) {
$whole_match = $matches [ 1 ];
$link_text = $matches [ 2 ];
$link_id =& $matches [ 3 ];
if ( $link_id == " " ) {
# for shortcut links like [this][] or [this].
$link_id = $link_text ;
}
# lower-case and turn embedded newlines into spaces
$link_id = strtolower ( $link_id );
$link_id = preg_replace ( '{[ ]?\n}' , ' ' , $link_id );
2006-02-25 16:48:46 +00:00
2007-02-08 11:23:25 +00:00
if ( isset ( $this -> urls [ $link_id ])) {
$url = $this -> urls [ $link_id ];
$url = $this -> encodeAmpsAndAngles ( $url );
2006-02-25 16:48:46 +00:00
2007-02-08 11:23:25 +00:00
$result = " <a href= \" $url\ " " ;
if ( isset ( $this -> titles [ $link_id ] ) ) {
$title = $this -> titles [ $link_id ];
$title = $this -> encodeAmpsAndAngles ( $title );
$result .= " title= \" $title\ " " ;
}
$link_text = $this -> runSpanGamut ( $link_text );
$result .= " > $link_text </a> " ;
2007-09-17 12:37:51 +00:00
$result = $this -> hashPart ( $result );
2006-02-25 16:48:46 +00:00
}
else {
2007-02-08 11:23:25 +00:00
$result = $whole_match ;
2006-02-25 16:48:46 +00:00
}
2007-02-08 11:23:25 +00:00
return $result ;
}
function _doAnchors_inline_callback ( $matches ) {
$whole_match = $matches [ 1 ];
$link_text = $this -> runSpanGamut ( $matches [ 2 ]);
2007-08-21 09:54:49 +00:00
$url = $matches [ 3 ] == '' ? $matches [ 4 ] : $matches [ 3 ];
$title =& $matches [ 7 ];
2007-02-08 11:23:25 +00:00
$url = $this -> encodeAmpsAndAngles ( $url );
2004-07-30 04:02:58 +00:00
2007-02-08 11:23:25 +00:00
$result = " <a href= \" $url\ " " ;
if ( isset ( $title )) {
$title = str_replace ( '"' , '"' , $title );
$title = $this -> encodeAmpsAndAngles ( $title );
$result .= " title= \" $title\ " " ;
}
$link_text = $this -> runSpanGamut ( $link_text );
$result .= " > $link_text </a> " ;
2005-04-11 14:08:01 +00:00
2007-09-17 12:37:51 +00:00
return $this -> hashPart ( $result );
2007-02-08 11:23:25 +00:00
}
function doImages ( $text ) {
2006-02-25 16:48:46 +00:00
#
2007-02-08 11:23:25 +00:00
# Turn Markdown image shortcuts into <img> tags.
2006-02-25 16:48:46 +00:00
#
#
2007-02-08 11:23:25 +00:00
# First, handle reference-style labeled images: ![alt text][id]
2006-02-25 16:48:46 +00:00
#
2007-02-08 11:23:25 +00:00
$text = preg_replace_callback ( ' {
( # wrap whole match in $1
! \ [
( '.$this->nested_brackets.' ) # alt text = $2
\ ]
[ ] ? # one optional space
( ? : \n [ ] * ) ? # one optional newline followed by spaces
\ [
( .* ? ) # id = $3
\ ]
)
} xs ' ,
array ( & $this , '_doImages_reference_callback' ), $text );
2006-02-25 16:48:46 +00:00
#
2007-02-08 11:23:25 +00:00
# Next, handle inline images: ![alt text](url "optional title")
# Don't forget: encode * and _
2006-02-25 16:48:46 +00:00
#
2007-02-08 11:23:25 +00:00
$text = preg_replace_callback ( ' {
( # wrap whole match in $1
! \ [
( '.$this->nested_brackets.' ) # alt text = $2
\ ]
\s ? # One optional whitespace character
\ ( # literal paren
2007-08-21 09:54:49 +00:00
[ ] *
( ? :
< ( \S * ) > # src url = $3
|
( '.$this->nested_url_parenthesis.' ) # src url = $4
)
[ ] *
( # $5
([ \ ' " ]) # quote char = $ 6
( .* ? ) # title = $7
\6 # matching quote
[ ] *
2007-02-08 11:23:25 +00:00
) ? # title is optional
\ )
)
} xs ' ,
array ( & $this , '_doImages_inline_callback' ), $text );
return $text ;
}
function _doImages_reference_callback ( $matches ) {
$whole_match = $matches [ 1 ];
$alt_text = $matches [ 2 ];
$link_id = strtolower ( $matches [ 3 ]);
if ( $link_id == " " ) {
$link_id = strtolower ( $alt_text ); # for shortcut links like ![this][].
2006-02-25 16:48:46 +00:00
}
2007-02-08 11:23:25 +00:00
$alt_text = str_replace ( '"' , '"' , $alt_text );
if ( isset ( $this -> urls [ $link_id ])) {
$url = $this -> urls [ $link_id ];
$result = " <img src= \" $url\ " alt = \ " $alt_text\ " " ;
if ( isset ( $this -> titles [ $link_id ])) {
$title = $this -> titles [ $link_id ];
$result .= " title= \" $title\ " " ;
2006-02-25 16:48:46 +00:00
}
2007-02-08 11:23:25 +00:00
$result .= $this -> empty_element_suffix ;
2007-09-17 12:37:51 +00:00
$result = $this -> hashPart ( $result );
2006-02-25 16:48:46 +00:00
}
2007-02-08 11:23:25 +00:00
else {
# If there's no such link ID, leave intact:
$result = $whole_match ;
}
return $result ;
}
function _doImages_inline_callback ( $matches ) {
$whole_match = $matches [ 1 ];
$alt_text = $matches [ 2 ];
2007-08-21 09:54:49 +00:00
$url = $matches [ 3 ] == '' ? $matches [ 4 ] : $matches [ 3 ];
$title =& $matches [ 7 ];
2007-02-08 11:23:25 +00:00
$alt_text = str_replace ( '"' , '"' , $alt_text );
$result = " <img src= \" $url\ " alt = \ " $alt_text\ " " ;
if ( isset ( $title )) {
$title = str_replace ( '"' , '"' , $title );
$result .= " title= \" $title\ " " ; # $title already quoted
}
$result .= $this -> empty_element_suffix ;
2007-09-17 12:37:51 +00:00
return $this -> hashPart ( $result );
2007-02-08 11:23:25 +00:00
}
function doHeaders ( $text ) {
# Setext-style headers:
# Header 1
# ========
#
# Header 2
# --------
#
2007-09-17 12:37:51 +00:00
$text = preg_replace_callback ( '{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx' ,
array ( & $this , '_doHeaders_callback_setext' ), $text );
2007-02-08 11:23:25 +00:00
# atx-style headers:
# # Header 1
# ## Header 2
# ## Header 2 with closing hashes ##
# ...
# ###### Header 6
#
$text = preg_replace_callback ( ' {
^ ( \ #{1,6}) # $1 = string of #\'s
2007-08-21 09:54:49 +00:00
[ ] *
2007-02-08 11:23:25 +00:00
( .+ ? ) # $2 = Header text
2007-08-21 09:54:49 +00:00
[ ] *
2007-02-08 11:23:25 +00:00
\ #* # optional closing #\'s (not counted)
\n +
} xm ' ,
array ( & $this , '_doHeaders_callback_atx' ), $text );
return $text ;
}
2007-09-17 12:37:51 +00:00
function _doHeaders_callback_setext ( $matches ) {
$level = $matches [ 2 ]{ 0 } == '=' ? 1 : 2 ;
$block = " <h $level > " . $this -> runSpanGamut ( $matches [ 1 ]) . " </h $level > " ;
2007-02-08 11:23:25 +00:00
return " \n " . $this -> hashBlock ( $block ) . " \n \n " ;
}
function _doHeaders_callback_atx ( $matches ) {
$level = strlen ( $matches [ 1 ]);
$block = " <h $level > " . $this -> runSpanGamut ( $matches [ 2 ]) . " </h $level > " ;
return " \n " . $this -> hashBlock ( $block ) . " \n \n " ;
}
function doLists ( $text ) {
#
# Form HTML ordered (numbered) and unordered (bulleted) lists.
#
$less_than_tab = $this -> tab_width - 1 ;
# Re-usable patterns to match list item bullets and number markers:
$marker_ul = '[*+-]' ;
$marker_ol = '\d+[.]' ;
$marker_any = " (?: $marker_ul | $marker_ol ) " ;
$markers = array ( $marker_ul , $marker_ol );
foreach ( $markers as $marker ) {
# Re-usable pattern to match any entirel ul or ol list:
$whole_list = '
( # $1 = whole list
( # $2
[ ]{ 0 , '.$less_than_tab.' }
( '.$marker.' ) # $3 = first list item marker
2007-08-21 09:54:49 +00:00
[ ] +
2007-02-08 11:23:25 +00:00
)
( ? s :.+ ? )
( # $4
\z
|
\n { 2 ,}
( ? = \S )
( ? ! # Negative lookahead for another list item marker
2007-08-21 09:54:49 +00:00
[ ] *
'.$marker.' [ ] +
2007-02-08 11:23:25 +00:00
)
)
)
' ; // mx
# We use a different prefix before nested lists than top-level lists.
# See extended comment in _ProcessListItems().
2006-02-25 16:48:46 +00:00
2007-02-08 11:23:25 +00:00
if ( $this -> list_level ) {
$text = preg_replace_callback ( ' {
^
'.$whole_list.'
} mx ' ,
array ( & $this , '_doLists_callback' ), $text );
}
else {
$text = preg_replace_callback ( ' {
( ? : ( ? <= \n ) \n | \A\n ? ) # Must eat the newline
'.$whole_list.'
} mx ' ,
array ( & $this , '_doLists_callback' ), $text );
}
}
return $text ;
}
function _doLists_callback ( $matches ) {
# Re-usable patterns to match list item bullets and number markers:
$marker_ul = '[*+-]' ;
$marker_ol = '\d+[.]' ;
$marker_any = " (?: $marker_ul | $marker_ol ) " ;
$list = $matches [ 1 ];
$list_type = preg_match ( " / $marker_ul / " , $matches [ 3 ]) ? " ul " : " ol " ;
$marker_any = ( $list_type == " ul " ? $marker_ul : $marker_ol );
$list .= " \n " ;
$result = $this -> processListItems ( $list , $marker_any );
$result = $this -> hashBlock ( " < $list_type > \n " . $result . " </ $list_type > " );
return " \n " . $result . " \n \n " ;
}
var $list_level = 0 ;
function processListItems ( $list_str , $marker_any ) {
2006-02-25 16:48:46 +00:00
#
2007-02-08 11:23:25 +00:00
# Process the contents of a single ordered or unordered list, splitting it
# into individual list items.
2006-02-25 16:48:46 +00:00
#
2007-02-08 11:23:25 +00:00
# The $this->list_level global keeps track of when we're inside a list.
# Each time we enter a list, we increment it; when we leave a list,
# we decrement. If it's zero, we're not in a list anymore.
#
# We do this because when we're not inside a list, we want to treat
# something like this:
#
# I recommend upgrading to version
# 8. Oops, now this line is treated
# as a sub-list.
#
# As a single paragraph, despite the fact that the second line starts
# with a digit-period-space sequence.
#
# Whereas when we're inside a list (or sub-list), that line will be
# treated as the start of a sub-list. What a kludge, huh? This is
# an aspect of Markdown's syntax that's hard to parse perfectly
# without resorting to mind-reading. Perhaps the solution is to
# change the syntax rules such that sub-lists must start with a
# starting cardinal number; e.g. "1." or "a.".
$this -> list_level ++ ;
# trim trailing blank lines:
$list_str = preg_replace ( " / \n { 2,} \\ z/ " , " \n " , $list_str );
$list_str = preg_replace_callback ( ' {
( \n ) ? # leading line = $1
2007-08-21 09:54:49 +00:00
( ^ [ ] * ) # leading whitespace = $2
( '.$marker_any.' ) [ ] + # list marker = $3
2007-02-08 11:23:25 +00:00
(( ? s :.+ ? )) # list item text = $4
( ? : ( \n + ( ? = \n )) | \n ) # tailing blank line = $5
2007-08-21 09:54:49 +00:00
( ? = \n * ( \z | \2 ( '.$marker_any.' ) [ ] + ))
2007-02-08 11:23:25 +00:00
} xm ' ,
array ( & $this , '_processListItems_callback' ), $list_str );
$this -> list_level -- ;
return $list_str ;
}
function _processListItems_callback ( $matches ) {
$item = $matches [ 4 ];
$leading_line =& $matches [ 1 ];
$leading_space =& $matches [ 2 ];
$tailing_blank_line =& $matches [ 5 ];
if ( $leading_line || $tailing_blank_line ||
preg_match ( '/\n{2,}/' , $item ))
{
$item = $this -> runBlockGamut ( $this -> outdent ( $item ) . " \n " );
}
else {
# Recursion for sub-lists:
$item = $this -> doLists ( $this -> outdent ( $item ));
$item = preg_replace ( '/\n+$/' , '' , $item );
$item = $this -> runSpanGamut ( $item );
}
2006-02-25 16:48:46 +00:00
2007-02-08 11:23:25 +00:00
return " <li> " . $item . " </li> \n " ;
}
2006-02-25 16:48:46 +00:00
2004-07-30 04:02:58 +00:00
2007-02-08 11:23:25 +00:00
function doCodeBlocks ( $text ) {
#
# Process Markdown `<pre><code>` blocks.
#
$text = preg_replace_callback ( ' {
( ? : \n\n | \A )
( # $1 = the code block -- one or more lines, starting with a space/tab
2007-09-17 12:37:51 +00:00
( ?>
[ ]{ '.$this->tab_width.' } # Lines must start with a tab or a tab-width of spaces
2007-02-08 11:23:25 +00:00
.* \n +
) +
)
(( ? =^ [ ]{ 0 , '.$this->tab_width.' } \S ) | \Z ) # Lookahead for non-space at line-start, or end of doc
} xm ' ,
array ( & $this , '_doCodeBlocks_callback' ), $text );
return $text ;
}
function _doCodeBlocks_callback ( $matches ) {
$codeblock = $matches [ 1 ];
2004-07-30 04:02:58 +00:00
2007-09-17 12:37:51 +00:00
$codeblock = $this -> outdent ( $codeblock );
$codeblock = htmlspecialchars ( $codeblock , ENT_NOQUOTES );
2005-04-11 14:08:01 +00:00
2007-09-17 12:37:51 +00:00
# trim leading newlines and trailing newlines
$codeblock = preg_replace ( '/\A\n+|\n+\z/' , '' , $codeblock );
2007-02-08 11:23:25 +00:00
2007-09-17 12:37:51 +00:00
$codeblock = " <pre><code> $codeblock\n </code></pre> " ;
return " \n \n " . $this -> hashBlock ( $codeblock ) . " \n \n " ;
2007-02-08 11:23:25 +00:00
}
2004-07-30 04:02:58 +00:00
2007-09-17 12:37:51 +00:00
function makeCodeSpan ( $code ) {
2007-02-08 11:23:25 +00:00
#
2007-09-17 12:37:51 +00:00
# Create a code span markup for $code. Called from handleSpanToken.
2007-02-08 11:23:25 +00:00
#
2007-09-17 12:37:51 +00:00
$code = htmlspecialchars ( trim ( $code ), ENT_NOQUOTES );
return $this -> hashPart ( " <code> $code </code> " );
2007-02-08 11:23:25 +00:00
}
2004-07-30 04:02:58 +00:00
2007-02-08 11:23:25 +00:00
function doItalicsAndBold ( $text ) {
# <strong> must go first:
$text = preg_replace_callback ( ' {
( # $1: Marker
( ? <! \ * \ * ) \ * | # (not preceded by two chars of
( ? <! __ ) _ # the same marker)
)
\1
( ? = \S ) # Not followed by whitespace
( ? ! \1\1 ) # or two others marker chars.
( # $2: Content
2007-08-21 09:54:49 +00:00
( ?>
2007-02-08 11:23:25 +00:00
[ ^* _ ] + ? # Anthing not em markers.
|
# Balence any regular emphasis inside.
\1 ( ? = \S ) .+ ? ( ? <= \S ) \1
|
2007-08-21 09:54:49 +00:00
. # Allow unbalenced * and _.
2007-02-08 11:23:25 +00:00
) + ?
)
( ? <= \S ) \1\1 # End mark not preceded by whitespace.
} sx ' ,
array ( & $this , '_doItalicAndBold_strong_callback' ), $text );
# Then <em>:
$text = preg_replace_callback (
2007-08-21 09:54:49 +00:00
'{ ( (?<!\*)\* | (?<!_)_ ) (?=\S) (?! \1) (.+?) (?<=\S)(?<!\s(?=\1).) \1 }sx' ,
2007-02-08 11:23:25 +00:00
array ( & $this , '_doItalicAndBold_em_callback' ), $text );
return $text ;
}
function _doItalicAndBold_em_callback ( $matches ) {
$text = $matches [ 2 ];
$text = $this -> runSpanGamut ( $text );
2007-09-17 12:37:51 +00:00
return $this -> hashPart ( " <em> $text </em> " );
2007-02-08 11:23:25 +00:00
}
function _doItalicAndBold_strong_callback ( $matches ) {
$text = $matches [ 2 ];
$text = $this -> runSpanGamut ( $text );
2007-09-17 12:37:51 +00:00
return $this -> hashPart ( " <strong> $text </strong> " );
2004-07-30 04:02:58 +00:00
}
2007-02-08 11:23:25 +00:00
function doBlockQuotes ( $text ) {
$text = preg_replace_callback ( ' /
( # Wrap whole match in $1
2007-09-17 12:37:51 +00:00
( ?>
2007-08-21 09:54:49 +00:00
^ [ ] *> [ ] ? # ">" at the start of a line
2007-02-08 11:23:25 +00:00
.+ \n # rest of the first line
( .+ \n ) * # subsequent consecutive lines
\n * # blanks
) +
)
/ xm ' ,
array ( & $this , '_doBlockQuotes_callback' ), $text );
return $text ;
}
function _doBlockQuotes_callback ( $matches ) {
$bq = $matches [ 1 ];
# trim one level of quoting - trim whitespace-only lines
2007-09-17 12:37:51 +00:00
$bq = preg_replace ( '/^[ ]*>[ ]?|^[ ]+$/m' , '' , $bq );
2007-02-08 11:23:25 +00:00
$bq = $this -> runBlockGamut ( $bq ); # recurse
$bq = preg_replace ( '/^/m' , " " , $bq );
# These leading spaces cause problem with <pre> content,
# so we need to fix that:
$bq = preg_replace_callback ( '{(\s*<pre>.+?</pre>)}sx' ,
array ( & $this , '_DoBlockQuotes_callback2' ), $bq );
return " \n " . $this -> hashBlock ( " <blockquote> \n $bq\n </blockquote> " ) . " \n \n " ;
}
function _doBlockQuotes_callback2 ( $matches ) {
$pre = $matches [ 1 ];
$pre = preg_replace ( '/^ /m' , '' , $pre );
return $pre ;
}
function formParagraphs ( $text ) {
#
# Params:
# $text - string to process with html <p> tags
#
# Strip leading and trailing lines:
2007-09-17 12:37:51 +00:00
$text = preg_replace ( '/\A\n+|\n+\z/' , '' , $text );
2007-02-08 11:23:25 +00:00
$grafs = preg_split ( '/\n{2,}/' , $text , - 1 , PREG_SPLIT_NO_EMPTY );
#
2007-09-17 12:37:51 +00:00
# Wrap <p> tags and unhashify HTML blocks
2007-02-08 11:23:25 +00:00
#
foreach ( $grafs as $key => $value ) {
2007-09-17 12:37:51 +00:00
if ( ! preg_match ( '/^B\x1A[0-9]+B$/' , $value )) {
# Is a paragraph.
2007-02-08 11:23:25 +00:00
$value = $this -> runSpanGamut ( $value );
2007-08-21 09:54:49 +00:00
$value = preg_replace ( '/^([ ]*)/' , " <p> " , $value );
2007-02-08 11:23:25 +00:00
$value .= " </p> " ;
$grafs [ $key ] = $this -> unhash ( $value );
}
2007-09-17 12:37:51 +00:00
else {
# Is a block.
# Modify elements of @grafs in-place...
$graf = $value ;
$block = $this -> html_hashes [ $graf ];
2007-02-08 11:23:25 +00:00
$graf = $block ;
// if (preg_match('{
// \A
// ( # $1 = <div> tag
// <div \s+
// [^>]*
// \b
// markdown\s*=\s* ([\'"]) # $2 = attr quote char
// 1
// \2
// [^>]*
// >
// )
// ( # $3 = contents
// .*
// )
// (</div>) # $4 = closing tag
// \z
// }xs', $block, $matches))
// {
// list(, $div_open, , $div_content, $div_close) = $matches;
//
// # We can't call Markdown(), because that resets the hash;
// # that initialization code should be pulled into its own sub, though.
// $div_content = $this->hashHTMLBlocks($div_content);
//
// # Run document gamut methods on the content.
// foreach ($this->document_gamut as $method => $priority) {
// $div_content = $this->$method($div_content);
// }
//
// $div_open = preg_replace(
// '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
//
// $graf = $div_open . "\n" . $div_content . "\n" . $div_close;
// }
$grafs [ $key ] = $graf ;
}
}
return implode ( " \n \n " , $grafs );
2004-07-30 04:02:58 +00:00
}
2007-02-08 11:23:25 +00:00
function encodeAmpsAndAngles ( $text ) {
# Smart processing for ampersands and angle brackets that need to be encoded.
2007-08-21 09:54:49 +00:00
if ( $this -> no_entities ) {
$text = str_replace ( '&' , '&' , $text );
$text = str_replace ( '<' , '<' , $text );
return $text ;
}
2007-02-08 11:23:25 +00:00
# Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
# http://bumppo.net/projects/amputator/
$text = preg_replace ( '/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/' ,
'&' , $text );;
# Encode naked <'s
$text = preg_replace ( '{<(?![a-z/?\$!%])}i' , '<' , $text );
return $text ;
2004-07-30 04:02:58 +00:00
}
2007-02-08 11:23:25 +00:00
function doAutoLinks ( $text ) {
$text = preg_replace_callback ( '{<((https?|ftp|dict):[^\'">\s]+)>}' ,
array ( & $this , '_doAutoLinks_url_callback' ), $text );
2004-07-30 04:02:58 +00:00
2007-02-08 11:23:25 +00:00
# Email addresses: <address@domain.foo>
$text = preg_replace_callback ( ' {
<
( ? : mailto : ) ?
(
[ -. \w\x80 - \xFF ] +
\ @
[ - a - z0 - 9 \x80 - \xFF ] + ( \ . [ - a - z0 - 9 \x80 - \xFF ] + ) * \ . [ a - z ] +
)
>
} xi ' ,
array ( & $this , '_doAutoLinks_email_callback' ), $text );
return $text ;
}
function _doAutoLinks_url_callback ( $matches ) {
$url = $this -> encodeAmpsAndAngles ( $matches [ 1 ]);
$link = " <a href= \" $url\ " > $url </ a > " ;
2007-09-17 12:37:51 +00:00
return $this -> hashPart ( $link );
2007-02-08 11:23:25 +00:00
}
function _doAutoLinks_email_callback ( $matches ) {
$address = $matches [ 1 ];
$link = $this -> encodeEmailAddress ( $address );
2007-09-17 12:37:51 +00:00
return $this -> hashPart ( $link );
2007-02-08 11:23:25 +00:00
}
function encodeEmailAddress ( $addr ) {
#
# Input: an email address, e.g. "foo@example.com"
#
# Output: the email address as a mailto link, with each character
# of the address encoded as either a decimal or hex entity, in
# the hopes of foiling most address harvesting spam bots. E.g.:
#
# <p><a href="mailto:foo
# @example.co
# m">foo@exampl
# e.com</a></p>
#
# Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
# With some optimizations by Milian Wolff.
#
$addr = " mailto: " . $addr ;
$chars = preg_split ( '/(?<!^)(?!$)/' , $addr );
$seed = ( int ) abs ( crc32 ( $addr ) / strlen ( $addr )); # Deterministic seed.
foreach ( $chars as $key => $char ) {
$ord = ord ( $char );
# Ignore non-ascii chars.
if ( $ord < 128 ) {
$r = ( $seed * ( 1 + $key )) % 100 ; # Pseudo-random function.
# roughly 10% raw, 45% hex, 45% dec
# '@' *must* be encoded. I insist.
if ( $r > 90 && $char != '@' ) /* do nothing */ ;
else if ( $r < 45 ) $chars [ $key ] = '&#x' . dechex ( $ord ) . ';' ;
else $chars [ $key ] = '&#' . $ord . ';' ;
}
}
$addr = implode ( '' , $chars );
$text = implode ( '' , array_slice ( $chars , 7 )); # text without `mailto:`
$addr = " <a href= \" $addr\ " > $text </ a > " ;
2006-02-25 16:48:46 +00:00
2007-02-08 11:23:25 +00:00
return $addr ;
}
2007-09-17 12:37:51 +00:00
function parseSpan ( $str ) {
2007-02-08 11:23:25 +00:00
#
2007-09-17 12:37:51 +00:00
# Take the string $str and parse it into tokens, hashing embeded HTML,
# escaped characters and handling code spans.
2007-02-08 11:23:25 +00:00
#
2007-09-17 12:37:51 +00:00
$output = '' ;
$regex = ' {
2007-02-08 11:23:25 +00:00
(
2007-09-17 12:37:51 +00:00
\\\\ [ '.preg_quote($this->escape_chars).' ]
|
2007-02-08 11:23:25 +00:00
( ? <! [ ` \\\\ ])
` + # code span marker
2007-09-17 12:37:51 +00:00
'.( $this->no_markup ? ' ' : '
2007-02-08 11:23:25 +00:00
|
<!-- .* ? --> # comment
|
< \ ? .* ? \ ?> | <%.*?%> # processing instruction
|
< [ /! $ ] ? [ - a - zA - Z0 - 9 : ] + # regular tags
2007-09-17 12:37:51 +00:00
( ?>
2007-02-08 11:23:25 +00:00
\s
( ?> [^"\'>]+|"[^"]*"|\'[^\']*\')*
) ?
>
2007-09-17 12:37:51 +00:00
').'
2007-02-08 11:23:25 +00:00
)
2007-09-17 12:37:51 +00:00
} xs ' ;
while ( 1 ) {
#
# Each loop iteration seach for either the next tag, the next
# openning code span marker, or the next escaped character.
# Each token is then passed to handleSpanToken.
#
$parts = preg_split ( $regex , $str , 2 , PREG_SPLIT_DELIM_CAPTURE );
2007-02-08 11:23:25 +00:00
# Create token from text preceding tag.
if ( $parts [ 0 ] != " " ) {
2007-09-17 12:37:51 +00:00
$output .= $parts [ 0 ];
2007-02-08 11:23:25 +00:00
}
# Check if we reach the end.
2007-09-17 12:37:51 +00:00
if ( isset ( $parts [ 1 ])) {
$output .= $this -> handleSpanToken ( $parts [ 1 ], $parts [ 2 ]);
$str = $parts [ 2 ];
}
else {
2007-02-08 11:23:25 +00:00
break ;
}
2007-09-17 12:37:51 +00:00
}
return $output ;
}
function handleSpanToken ( $token , & $str ) {
#
# Handle $token provided by parseSpan by determining its nature and
# returning the corresponding value that should replace it.
#
switch ( $token { 0 }) {
case " \\ " :
return $this -> hashPart ( " &# " . ord ( $token { 1 }) . " ; " );
case " ` " :
# Search for end marker in remaining text.
if ( preg_match ( '/^(.*?[^`])' . $token . '(?!`)(.*)$/sm' ,
2007-02-08 11:23:25 +00:00
$str , $matches ))
{
$str = $matches [ 2 ];
2007-09-17 12:37:51 +00:00
$codespan = $this -> makeCodeSpan ( $matches [ 1 ]);
return $this -> hashPart ( $codespan );
2007-02-08 11:23:25 +00:00
}
2007-09-17 12:37:51 +00:00
return $token ; // return as text since no ending marker found.
default :
return $this -> hashPart ( $token );
2007-02-08 11:23:25 +00:00
}
}
2004-07-30 04:02:58 +00:00
2007-02-08 11:23:25 +00:00
function outdent ( $text ) {
#
# Remove one level of line-leading tabs or spaces
2004-07-30 04:02:58 +00:00
#
2007-09-17 12:37:51 +00:00
return preg_replace ( '/^(\t|[ ]{1,' . $this -> tab_width . '})/m' , '' , $text );
2007-02-08 11:23:25 +00:00
}
2004-07-30 04:02:58 +00:00
2007-02-08 11:23:25 +00:00
# String length function for detab. `_initDetab` will create a function to
# hanlde UTF-8 if the default function does not exist.
var $utf8_strlen = 'mb_strlen' ;
function detab ( $text ) {
#
# Replace tabs with the appropriate amount of space.
#
# For each line we separate the line in blocks delemited by
# tab characters. Then we reconstruct every line by adding the
# appropriate number of space between each blocks.
2007-09-17 12:37:51 +00:00
$text = preg_replace_callback ( '/^.*\t.*$/m' ,
array ( & $this , '_detab_callback' ), $text );
return $text ;
}
function _detab_callback ( $matches ) {
$line = $matches [ 0 ];
2007-08-21 09:54:49 +00:00
$strlen = $this -> utf8_strlen ; # strlen function for UTF-8.
2007-02-08 11:23:25 +00:00
2007-09-17 12:37:51 +00:00
# Split in blocks.
$blocks = explode ( " \t " , $line );
# Add each blocks to the line.
$line = $blocks [ 0 ];
unset ( $blocks [ 0 ]); # Do not add first block twice.
foreach ( $blocks as $block ) {
# Calculate amount of space, insert spaces, insert block.
$amount = $this -> tab_width -
$strlen ( $line , 'UTF-8' ) % $this -> tab_width ;
$line .= str_repeat ( " " , $amount ) . $block ;
2004-07-30 04:02:58 +00:00
}
2007-09-17 12:37:51 +00:00
return $line ;
2004-07-30 04:02:58 +00:00
}
2007-02-08 11:23:25 +00:00
function _initDetab () {
#
# Check for the availability of the function in the `utf8_strlen` property
2007-08-21 09:54:49 +00:00
# (initially `mb_strlen`). If the function is not available, create a
2007-02-08 11:23:25 +00:00
# function that will loosely count the number of UTF-8 characters with a
# regular expression.
#
if ( function_exists ( $this -> utf8_strlen )) return ;
2007-08-21 09:54:49 +00:00
$this -> utf8_strlen = create_function ( '$text' , ' return preg_match_all (
" /[ \\ \\ x00- \\ \\ xBF]|[ \\ \\ xC0- \\ \\ xFF][ \\ \\ x80- \\ \\ xBF]*/ " ,
$text , $m ); ' );
2004-07-30 04:02:58 +00:00
}
2007-02-08 11:23:25 +00:00
function unhash ( $text ) {
#
# Swap back in all the tags hashed by _HashHTMLBlocks.
#
2007-09-17 12:37:51 +00:00
return preg_replace_callback ( '/(.)\x1A[0-9]+\1/' ,
array ( & $this , '_unhash_callback' ), $text );
}
function _unhash_callback ( $matches ) {
return $this -> html_hashes [ $matches [ 0 ]];
2007-02-08 11:23:25 +00:00
}
2004-07-30 04:02:58 +00:00
}
2009-05-26 02:46:09 +00:00
/**
* Markdown Extra Parser Class
*
* @ package moodlecore
* @ copyright ( c ) 2004 - 2006 John Gruber
*/
2007-02-08 11:23:25 +00:00
class MarkdownExtra_Parser extends Markdown_Parser {
# Prefix for footnote ids.
var $fn_id_prefix = " " ;
# Optional title attribute for footnote links and backlinks.
var $fn_link_title = MARKDOWN_FN_LINK_TITLE ;
var $fn_backlink_title = MARKDOWN_FN_BACKLINK_TITLE ;
# Optional class attribute for footnote links and backlinks.
var $fn_link_class = MARKDOWN_FN_LINK_CLASS ;
var $fn_backlink_class = MARKDOWN_FN_BACKLINK_CLASS ;
function MarkdownExtra_Parser () {
#
# Constructor function. Initialize the parser object.
2006-02-25 16:48:46 +00:00
#
2007-02-08 11:23:25 +00:00
# Add extra escapable characters before parent constructor
# initialize the table.
$this -> escape_chars .= ':|' ;
# Insert extra document, block, and span transformations.
# Parent constructor will do the sorting.
$this -> document_gamut += array (
" stripFootnotes " => 15 ,
" stripAbbreviations " => 25 ,
" appendFootnotes " => 50 ,
);
$this -> block_gamut += array (
" doTables " => 15 ,
" doDefLists " => 45 ,
);
$this -> span_gamut += array (
2007-08-21 09:54:49 +00:00
" doFootnotes " => 5 ,
" doAbbreviations " => 70 ,
2007-02-08 11:23:25 +00:00
);
parent :: Markdown_Parser ();
}
# Extra hashes used during extra transformations.
var $footnotes = array ();
var $footnotes_ordered = array ();
var $abbr_desciptions = array ();
var $abbr_matches = array ();
2007-08-21 09:54:49 +00:00
# Status flag to avoid invalid nesting.
var $in_footnote = false ;
2007-02-08 11:23:25 +00:00
function transform ( $text ) {
2006-02-25 16:48:46 +00:00
#
2007-02-08 11:23:25 +00:00
# Added clear to the new $html_hashes, reordered `hashHTMLBlocks` before
# blank line stripping and added extra parameter to `runBlockGamut`.
2006-02-25 16:48:46 +00:00
#
2007-02-08 11:23:25 +00:00
# Clear the global hashes. If we don't clear these, you get conflicts
# from other articles when generating a page which contains more than
# one article (e.g. an index page that shows the N most recent
# articles):
$this -> footnotes = array ();
$this -> footnotes_ordered = array ();
$this -> abbr_desciptions = array ();
$this -> abbr_matches = array ();
return parent :: transform ( $text );
}
### HTML Block Parser ###
# Tags that are always treated as block tags:
var $block_tags = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend' ;
# Tags treated as block tags only if the opening tag is alone on it's line:
var $context_block_tags = 'script|noscript|math|ins|del' ;
# Tags where markdown="1" default to span mode:
var $contain_span_tags = 'p|h[1-6]|li|dd|dt|td|th|legend|address' ;
# Tags which must not have their contents modified, no matter where
# they appear:
var $clean_tags = 'script|math' ;
# Tags that do not need to be closed.
var $auto_close_tags = 'hr|img' ;
function hashHTMLBlocks ( $text ) {
#
# Hashify HTML Blocks and "clean tags".
#
# We only want to do this for block-level HTML tags, such as headers,
# lists, and tables. That's because we still want to wrap <p>s around
# "paragraphs" that are wrapped in non-block-level tags, such as anchors,
# phrase emphasis, and spans. The list of tags we're looking for is
# hard-coded.
#
# This works by calling _HashHTMLBlocks_InMarkdown, which then calls
# _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1"
# attribute is found whitin a tag, _HashHTMLBlocks_InHTML calls back
# _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag.
# These two functions are calling each other. It's recursive!
#
#
# Call the HTML-in-Markdown hasher.
#
list ( $text , ) = $this -> _hashHTMLBlocks_inMarkdown ( $text );
return $text ;
}
function _hashHTMLBlocks_inMarkdown ( $text , $indent = 0 ,
$enclosing_tag = '' , $span = false )
{
#
# Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags.
#
# * $indent is the number of space to be ignored when checking for code
# blocks. This is important because if we don't take the indent into
# account, something like this (which looks right) won't work as expected:
#
# <div>
# <div markdown="1">
# Hello World. <-- Is this a Markdown code block or text?
# </div> <-- Is this a Markdown code block or a real tag?
# <div>
#
# If you don't like this, just don't indent the tag on which
# you apply the markdown="1" attribute.
#
# * If $enclosing_tag is not empty, stops at the first unmatched closing
# tag with that name. Nested tags supported.
#
# * If $span is true, text inside must treated as span. So any double
# newline will be replaced by a single newline so that it does not create
# paragraphs.
#
# Returns an array of that form: ( processed text , remaining text )
#
if ( $text === '' ) return array ( '' , '' );
# Regex to check for the presense of newlines around a block tag.
$newline_match_before = '/(?:^\n?|\n\n)*$/' ;
$newline_match_after =
' {
^ # Start of text following the tag.
( ? : [ ] *<!--.* ? --> ) ? # Optional comment.
[ ] * \n # Must be followed by newline.
} xs ' ;
# Regex to match any tag.
$block_tag_match =
' {
( # $2: Capture hole tag.
</ ? # Any opening or closing tag.
( ? : # Tag name.
'.$this->block_tags.' |
'.$this->context_block_tags.' |
'.$this->clean_tags.' |
( ? ! \s ) '.$enclosing_tag.'
)
\s * # Whitespace.
2007-08-21 09:54:49 +00:00
( ?>
2007-02-08 11:23:25 +00:00
" .*? " | # Double quotes (can contain `>`)
\ ' .* ? \ ' | # Single quotes (can contain `>`)
.+ ? # Anything but quotes and `>`.
) * ?
> # End of tag.
|
<!-- .* ? --> # HTML Comment
|
< \ ? .* ? \ ?> | <%.*?%> # Processing instruction
|
<! \ [ CDATA\ [ .* ? \ ] \ ] > # CData Block
)
} xs ' ;
$depth = 0 ; # Current depth inside the tag tree.
$parsed = " " ; # Parsed text that will be returned.
#
# Loop through every tag until we find the closing tag of the parent
# or loop until reaching the end of text if no parent tag specified.
#
do {
#
# Split the text using the first $tag_match pattern found.
# Text before pattern will be first in the array, text after
# pattern will be at the end, and between will be any catches made
# by the pattern.
#
$parts = preg_split ( $block_tag_match , $text , 2 ,
PREG_SPLIT_DELIM_CAPTURE );
2006-02-25 16:48:46 +00:00
2007-02-08 11:23:25 +00:00
# If in Markdown span mode, add a empty-string span-level hash
# after each newline to prevent triggering any block element.
if ( $span ) {
2007-09-17 12:37:51 +00:00
$void = $this -> hashPart ( " " , ':' );
$newline = " $void\n " ;
2007-08-21 09:54:49 +00:00
$parts [ 0 ] = $void . str_replace ( " \n " , $newline , $parts [ 0 ]) . $void ;
2007-02-08 11:23:25 +00:00
}
2006-02-25 16:48:46 +00:00
2007-02-08 11:23:25 +00:00
$parsed .= $parts [ 0 ]; # Text before current tag.
# If end of $text has been reached. Stop loop.
if ( count ( $parts ) < 3 ) {
$text = " " ;
break ;
}
$tag = $parts [ 1 ]; # Tag to handle.
$text = $parts [ 2 ]; # Remaining text after current tag.
#
# Check for: Tag inside code block or span
#
if ( # Find current paragraph
2007-09-17 12:37:51 +00:00
preg_match ( '/(?>^\n?|\n\n)((?>.+\n?)*?)$/' , $parsed , $matches ) &&
2007-02-08 11:23:25 +00:00
(
# Then match in it either a code block...
preg_match ( '/^ {' . ( $indent + 4 ) . '}.*(?>\n {' . ( $indent + 4 ) . '}.*)*' .
'(?!\n)$/' , $matches [ 1 ], $x ) ||
# ...or unbalenced code span markers. (the regex matches balenced)
! preg_match ( '/^(?>[^`]+|(`+)(?>[^`]+|(?!\1[^`])`)*?\1(?!`))*$/s' ,
$matches [ 1 ])
))
{
# Tag is in code block or span and may not be a tag at all. So we
# simply skip the first char (should be a `<`).
$parsed .= $tag { 0 };
$text = substr ( $tag , 1 ) . $text ; # Put back $tag minus first char.
}
#
# Check for: Opening Block level tag or
# Opening Content Block tag (like ins and del)
# used as a block tag (tag is alone on it's line).
#
else if ( preg_match ( " { ^<(?: $this->block_tags ) \ b} " , $tag ) ||
( preg_match ( " { ^<(?: $this->context_block_tags ) \ b} " , $tag ) &&
preg_match ( $newline_match_before , $parsed ) &&
preg_match ( $newline_match_after , $text ) )
)
{
# Need to parse tag and following text using the HTML parser.
list ( $block_text , $text ) =
$this -> _hashHTMLBlocks_inHTML ( $tag . $text , " hashBlock " , true );
# Make sure it stays outside of any paragraph by adding newlines.
$parsed .= " \n \n $block_text\n\n " ;
}
#
# Check for: Clean tag (like script, math)
# HTML Comments, processing instructions.
#
else if ( preg_match ( " { ^<(?: $this->clean_tags ) \ b} " , $tag ) ||
$tag { 1 } == '!' || $tag { 1 } == '?' )
{
# Need to parse tag and following text using the HTML parser.
# (don't check for markdown attribute)
list ( $block_text , $text ) =
$this -> _hashHTMLBlocks_inHTML ( $tag . $text , " hashClean " , false );
$parsed .= $block_text ;
}
#
# Check for: Tag with same name as enclosing tag.
#
else if ( $enclosing_tag !== '' &&
# Same name as enclosing tag.
preg_match ( " { ^</?(?: $enclosing_tag ) \ b} " , $tag ))
{
#
# Increase/decrease nested tag count.
#
if ( $tag { 1 } == '/' ) $depth -- ;
else if ( $tag { strlen ( $tag ) - 2 } != '/' ) $depth ++ ;
if ( $depth < 0 ) {
#
# Going out of parent element. Clean up and break so we
# return to the calling function.
#
$text = $tag . $text ;
break ;
}
$parsed .= $tag ;
}
else {
$parsed .= $tag ;
}
} while ( $depth >= 0 );
return array ( $parsed , $text );
}
function _hashHTMLBlocks_inHTML ( $text , $hash_method , $md_attr ) {
2006-02-25 16:48:46 +00:00
#
2007-02-08 11:23:25 +00:00
# Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags.
2006-02-25 16:48:46 +00:00
#
2007-02-08 11:23:25 +00:00
# * Calls $hash_method to convert any blocks.
# * Stops when the first opening tag closes.
# * $md_attr indicate if the use of the `markdown="1"` attribute is allowed.
# (it is not inside clean tags)
2006-02-25 16:48:46 +00:00
#
2007-02-08 11:23:25 +00:00
# Returns an array of that form: ( processed text , remaining text )
#
if ( $text === '' ) return array ( '' , '' );
# Regex to match `markdown` attribute inside of a tag.
$markdown_attr_match = '
{
\s * # Eat whitespace before the `markdown` attribute
markdown
\s *= \s *
2007-08-21 09:54:49 +00:00
( ? :
([ " \ ']) # $ 1: quote delimiter
( .* ? ) # $2: attribute value
\1 # matching delimiter
|
([ ^ \s > ] * ) # $3: unquoted attribute value
)
() # $4: make $3 always defined (avoid warnings)
2007-02-08 11:23:25 +00:00
} xs ' ;
# Regex to match any tag.
$tag_match = ' {
( # $2: Capture hole tag.
</ ? # Any opening or closing tag.
[ \w : $ ] + # Tag name.
\s * # Whitespace.
2007-08-21 09:54:49 +00:00
( ?>
2007-02-08 11:23:25 +00:00
" .*? " | # Double quotes (can contain `>`)
\ ' .* ? \ ' | # Single quotes (can contain `>`)
.+ ? # Anything but quotes and `>`.
) * ?
> # End of tag.
|
<!-- .* ? --> # HTML Comment
|
< \ ? .* ? \ ?> | <%.*?%> # Processing instruction
|
<! \ [ CDATA\ [ .* ? \ ] \ ] > # CData Block
)
} xs ' ;
$original_text = $text ; # Save original text in case of faliure.
$depth = 0 ; # Current depth inside the tag tree.
$block_text = " " ; # Temporary text holder for current text.
$parsed = " " ; # Parsed text that will be returned.
#
# Get the name of the starting tag.
#
if ( preg_match ( " /^<([ \ w: $ ]*) \ b/ " , $text , $matches ))
$base_tag_name = $matches [ 1 ];
#
# Loop through every tag until we find the corresponding closing tag.
#
do {
#
# Split the text using the first $tag_match pattern found.
# Text before pattern will be first in the array, text after
# pattern will be at the end, and between will be any catches made
# by the pattern.
#
$parts = preg_split ( $tag_match , $text , 2 , PREG_SPLIT_DELIM_CAPTURE );
if ( count ( $parts ) < 3 ) {
#
# End of $text reached with unbalenced tag(s).
# In that case, we return original text unchanged and pass the
# first character as filtered to prevent an infinite loop in the
# parent function.
#
return array ( $original_text { 0 }, substr ( $original_text , 1 ));
}
2006-02-25 16:48:46 +00:00
2007-02-08 11:23:25 +00:00
$block_text .= $parts [ 0 ]; # Text before current tag.
$tag = $parts [ 1 ]; # Tag to handle.
$text = $parts [ 2 ]; # Remaining text after current tag.
2006-02-25 16:48:46 +00:00
2007-02-08 11:23:25 +00:00
#
# Check for: Auto-close tag (like <hr/>)
# Comments and Processing Instructions.
#
if ( preg_match ( " { ^</?(?: $this->auto_close_tags ) \ b} " , $tag ) ||
$tag { 1 } == '!' || $tag { 1 } == '?' )
{
# Just add the tag to the block as if it was text.
$block_text .= $tag ;
}
else {
#
# Increase/decrease nested tag count. Only do so if
# the tag's name match base tag's.
#
if ( preg_match ( " { ^</? $base_tag_name\b } " , $tag )) {
if ( $tag { 1 } == '/' ) $depth -- ;
else if ( $tag { strlen ( $tag ) - 2 } != '/' ) $depth ++ ;
}
#
# Check for `markdown="1"` attribute and handle it.
#
if ( $md_attr &&
2007-08-21 09:54:49 +00:00
preg_match ( $markdown_attr_match , $tag , $attr_m ) &&
preg_match ( '/^1|block|span$/' , $attr_m [ 2 ] . $attr_m [ 3 ]))
2007-02-08 11:23:25 +00:00
{
# Remove `markdown` attribute from opening tag.
$tag = preg_replace ( $markdown_attr_match , '' , $tag );
# Check if text inside this tag must be parsed in span mode.
2007-08-21 09:54:49 +00:00
$this -> mode = $attr_m [ 2 ] . $attr_m [ 3 ];
2007-02-08 11:23:25 +00:00
$span_mode = $this -> mode == 'span' || $this -> mode != 'block' &&
preg_match ( " { ^<(?: $this->contain_span_tags ) \ b} " , $tag );
# Calculate indent before tag.
preg_match ( '/(?:^|\n)( *?)(?! ).*?$/' , $block_text , $matches );
$indent = strlen ( $matches [ 1 ]);
# End preceding block with this tag.
$block_text .= $tag ;
$parsed .= $this -> $hash_method ( $block_text );
# Get enclosing tag name for the ParseMarkdown function.
preg_match ( '/^<([\w:$]*)\b/' , $tag , $matches );
$tag_name = $matches [ 1 ];
# Parse the content using the HTML-in-Markdown parser.
list ( $block_text , $text )
= $this -> _hashHTMLBlocks_inMarkdown ( $text , $indent ,
$tag_name , $span_mode );
# Outdent markdown text.
if ( $indent > 0 ) {
$block_text = preg_replace ( " /^[ ] { 1, $indent }/m " , " " ,
$block_text );
}
# Append tag content to parsed text.
if ( ! $span_mode ) $parsed .= " \n \n $block_text\n\n " ;
else $parsed .= " $block_text " ;
# Start over a new block.
$block_text = " " ;
}
else $block_text .= $tag ;
}
} while ( $depth > 0 );
#
# Hash last block text that wasn't processed inside the loop.
#
$parsed .= $this -> $hash_method ( $block_text );
return array ( $parsed , $text );
}
2006-02-25 16:48:46 +00:00
2007-02-08 11:23:25 +00:00
function hashClean ( $text ) {
#
# Called whenever a tag must be hashed when a function insert a "clean" tag
# in $text, it pass through this function and is automaticaly escaped,
# blocking invalid nested overlap.
#
2007-09-17 12:37:51 +00:00
return $this -> hashPart ( $text , 'C' );
2007-02-08 11:23:25 +00:00
}
function doHeaders ( $text ) {
#
# Redefined to add id attribute support.
#
# Setext-style headers:
# Header 1 {#header1}
# ========
#
# Header 2 {#header2}
# --------
#
$text = preg_replace_callback (
2007-09-17 12:37:51 +00:00
' {
( ^.+ ? ) # $1: Header text
( ? : [ ] + \ { \ #([-_:a-zA-Z0-9]+)\})? # $2: Id attribute
[ ] * \n ( =+|-+ )[ ] * \n + # $3: Header footer
} mx ' ,
array ( & $this , '_doHeaders_callback_setext' ), $text );
2007-02-08 11:23:25 +00:00
# atx-style headers:
# # Header 1 {#header1}
# ## Header 2 {#header2}
# ## Header 2 with closing hashes ## {#header3}
# ...
# ###### Header 6 {#header2}
#
$text = preg_replace_callback ( ' {
^ ( \ #{1,6}) # $1 = string of #\'s
2007-08-21 09:54:49 +00:00
[ ] *
2007-02-08 11:23:25 +00:00
( .+ ? ) # $2 = Header text
2007-08-21 09:54:49 +00:00
[ ] *
2007-02-08 11:23:25 +00:00
\ #* # optional closing #\'s (not counted)
( ? : [ ] + \ { \ #([-_:a-zA-Z0-9]+)\})? # id attribute
2007-08-21 09:54:49 +00:00
[ ] *
2007-02-08 11:23:25 +00:00
\n +
} xm ' ,
array ( & $this , '_doHeaders_callback_atx' ), $text );
return $text ;
}
function _doHeaders_attr ( $attr ) {
if ( empty ( $attr )) return " " ;
return " id= \" $attr\ " " ;
}
2007-09-17 12:37:51 +00:00
function _doHeaders_callback_setext ( $matches ) {
$level = $matches [ 3 ]{ 0 } == '=' ? 1 : 2 ;
2007-02-08 11:23:25 +00:00
$attr = $this -> _doHeaders_attr ( $id =& $matches [ 2 ]);
2007-09-17 12:37:51 +00:00
$block = " <h $level $attr > " . $this -> runSpanGamut ( $matches [ 1 ]) . " </h $level > " ;
2007-02-08 11:23:25 +00:00
return " \n " . $this -> hashBlock ( $block ) . " \n \n " ;
}
function _doHeaders_callback_atx ( $matches ) {
$level = strlen ( $matches [ 1 ]);
$attr = $this -> _doHeaders_attr ( $id =& $matches [ 3 ]);
$block = " <h $level $attr > " . $this -> runSpanGamut ( $matches [ 2 ]) . " </h $level > " ;
return " \n " . $this -> hashBlock ( $block ) . " \n \n " ;
}
function doTables ( $text ) {
#
# Form HTML tables.
#
$less_than_tab = $this -> tab_width - 1 ;
#
# Find tables with leading pipe.
#
# | Header 1 | Header 2
# | -------- | --------
# | Cell 1 | Cell 2
# | Cell 3 | Cell 4
#
$text = preg_replace_callback ( '
{
^ # Start of a line
[ ]{ 0 , '.$less_than_tab.' } # Allowed whitespace.
[ | ] # Optional leading pipe (present)
( .+ ) \n # $1: Header row (at least one pipe)
[ ]{ 0 , '.$less_than_tab.' } # Allowed whitespace.
[ | ] ([ ] * [ -: ] + [ -| : ] * ) \n # $2: Header underline
( # $3: Cells
2007-09-17 12:37:51 +00:00
( ?>
2007-02-08 11:23:25 +00:00
[ ] * # Allowed whitespace.
[ | ] .* \n # Row content.
) *
)
( ? = \n | \Z ) # Stop at final double newline.
} xm ' ,
array ( & $this , '_doTable_leadingPipe_callback' ), $text );
#
# Find tables without leading pipe.
#
# Header 1 | Header 2
# -------- | --------
# Cell 1 | Cell 2
# Cell 3 | Cell 4
#
$text = preg_replace_callback ( '
{
^ # Start of a line
[ ]{ 0 , '.$less_than_tab.' } # Allowed whitespace.
( \S .* [ | ] .* ) \n # $1: Header row (at least one pipe)
[ ]{ 0 , '.$less_than_tab.' } # Allowed whitespace.
([ -: ] + [ ] * [ | ][ -| : ] * ) \n # $2: Header underline
( # $3: Cells
2007-09-17 12:37:51 +00:00
( ?>
2007-02-08 11:23:25 +00:00
.* [ | ] .* \n # Row content
) *
)
( ? = \n | \Z ) # Stop at final double newline.
} xm ' ,
array ( & $this , '_DoTable_callback' ), $text );
return $text ;
}
function _doTable_leadingPipe_callback ( $matches ) {
$head = $matches [ 1 ];
$underline = $matches [ 2 ];
$content = $matches [ 3 ];
# Remove leading pipe for each row.
$content = preg_replace ( '/^ *[|]/m' , '' , $content );
return $this -> _doTable_callback ( array ( $matches [ 0 ], $head , $underline , $content ));
2006-02-25 16:48:46 +00:00
}
2007-02-08 11:23:25 +00:00
function _doTable_callback ( $matches ) {
$head = $matches [ 1 ];
$underline = $matches [ 2 ];
$content = $matches [ 3 ];
# Remove any tailing pipes for each line.
$head = preg_replace ( '/[|] *$/m' , '' , $head );
$underline = preg_replace ( '/[|] *$/m' , '' , $underline );
$content = preg_replace ( '/[|] *$/m' , '' , $content );
# Reading alignement from header underline.
$separators = preg_split ( '/ *[|] */' , $underline );
foreach ( $separators as $n => $s ) {
if ( preg_match ( '/^ *-+: *$/' , $s )) $attr [ $n ] = ' align="right"' ;
else if ( preg_match ( '/^ *:-+: *$/' , $s )) $attr [ $n ] = ' align="center"' ;
else if ( preg_match ( '/^ *:-+ *$/' , $s )) $attr [ $n ] = ' align="left"' ;
else $attr [ $n ] = '' ;
}
2007-09-17 12:37:51 +00:00
# Parsing span elements, including code spans, character escapes,
# and inline HTML tags, so that pipes inside those gets ignored.
$head = $this -> parseSpan ( $head );
2007-02-08 11:23:25 +00:00
$headers = preg_split ( '/ *[|] */' , $head );
$col_count = count ( $headers );
2006-02-25 16:48:46 +00:00
2007-02-08 11:23:25 +00:00
# Write column headers.
$text = " <table> \n " ;
$text .= " <thead> \n " ;
2006-02-25 16:48:46 +00:00
$text .= " <tr> \n " ;
2007-02-08 11:23:25 +00:00
foreach ( $headers as $n => $header )
$text .= " <th $attr[$n] > " . $this -> runSpanGamut ( trim ( $header )) . " </th> \n " ;
2006-02-25 16:48:46 +00:00
$text .= " </tr> \n " ;
2007-02-08 11:23:25 +00:00
$text .= " </thead> \n " ;
# Split content by row.
$rows = explode ( " \n " , trim ( $content , " \n " ));
$text .= " <tbody> \n " ;
foreach ( $rows as $row ) {
2007-09-17 12:37:51 +00:00
# Parsing span elements, including code spans, character escapes,
# and inline HTML tags, so that pipes inside those gets ignored.
$row = $this -> parseSpan ( $row );
2007-02-08 11:23:25 +00:00
# Split row by cell.
$row_cells = preg_split ( '/ *[|] */' , $row , $col_count );
$row_cells = array_pad ( $row_cells , $col_count , '' );
$text .= " <tr> \n " ;
foreach ( $row_cells as $n => $cell )
$text .= " <td $attr[$n] > " . $this -> runSpanGamut ( trim ( $cell )) . " </td> \n " ;
$text .= " </tr> \n " ;
}
$text .= " </tbody> \n " ;
$text .= " </table> " ;
return $this -> hashBlock ( $text ) . " \n " ;
2006-02-25 16:48:46 +00:00
}
2007-02-08 11:23:25 +00:00
function doDefLists ( $text ) {
#
# Form HTML definition lists.
#
$less_than_tab = $this -> tab_width - 1 ;
2006-02-25 16:48:46 +00:00
2007-02-08 11:23:25 +00:00
# Re-usable pattern to match any entire dl list:
2007-09-17 12:37:51 +00:00
$whole_list = ' ( ?>
2006-02-25 16:48:46 +00:00
( # $1 = whole list
( # $2
[ ]{ 0 , '.$less_than_tab.' }
2007-02-08 11:23:25 +00:00
(( ?> .*\S.*\n)+) # $3 = defined term
\n ?
[ ]{ 0 , '.$less_than_tab.' } : [ ] + # colon starting definition
2004-07-30 04:02:58 +00:00
)
2006-02-25 16:48:46 +00:00
( ? s :.+ ? )
( # $4
\z
|
\n { 2 ,}
( ? = \S )
2007-02-08 11:23:25 +00:00
( ? ! # Negative lookahead for another term
[ ]{ 0 , '.$less_than_tab.' }
( ? : \S .* \n ) + ? # defined term
\n ?
[ ]{ 0 , '.$less_than_tab.' } : [ ] + # colon starting definition
)
( ? ! # Negative lookahead for another definition
[ ]{ 0 , '.$less_than_tab.' } : [ ] + # colon starting definition
2006-02-25 16:48:46 +00:00
)
)
)
2007-09-17 12:37:51 +00:00
) ' ; // mx
2004-07-30 04:02:58 +00:00
2007-02-08 11:23:25 +00:00
$text = preg_replace_callback ( ' {
( ? : ( ? <= \n\n ) | \A\n ? )
'.$whole_list.'
} mx ' ,
array ( & $this , '_doDefLists_callback' ), $text );
2004-07-30 04:02:58 +00:00
2007-02-08 11:23:25 +00:00
return $text ;
2004-07-30 04:02:58 +00:00
}
2007-02-08 11:23:25 +00:00
function _doDefLists_callback ( $matches ) {
# Re-usable patterns to match list item bullets and number markers:
$list = $matches [ 1 ];
# Turn double returns into triple returns, so that we can make a
# paragraph for the last item in a list, if necessary:
$result = trim ( $this -> processDefListItems ( $list ));
$result = " <dl> \n " . $result . " \n </dl> " ;
return $this -> hashBlock ( $result ) . " \n \n " ;
2004-07-30 04:02:58 +00:00
}
2006-02-25 16:48:46 +00:00
2007-02-08 11:23:25 +00:00
function processDefListItems ( $list_str ) {
#
# Process the contents of a single definition list, splitting it
# into individual term and definition list items.
#
$less_than_tab = $this -> tab_width - 1 ;
# trim trailing blank lines:
$list_str = preg_replace ( " / \n { 2,} \\ z/ " , " \n " , $list_str );
# Process definition terms.
$list_str = preg_replace_callback ( ' {
( ? : \n\n +| \A\n ? ) # leading line
( # definition terms = $1
[ ]{ 0 , '.$less_than_tab.' } # leading whitespace
( ? ! [ : ][ ] | [ ]) # negative lookahead for a definition
# mark (colon) or more whitespace.
( ? : \S .* \n ) + ? # actual term (not whitespace).
)
( ? = \n ? [ ]{ 0 , 3 } : [ ]) # lookahead for following line feed
# with a definition mark.
} xm ' ,
array ( & $this , '_processDefListItems_callback_dt' ), $list_str );
# Process actual definitions.
$list_str = preg_replace_callback ( ' {
\n ( \n + ) ? # leading line = $1
[ ]{ 0 , '.$less_than_tab.' } # whitespace before colon
[ : ][ ] + # definition mark (colon)
(( ? s :.+ ? )) # definition text = $2
( ? = \n + # stop at next definition mark,
( ? : # next term or end of text
[ ]{ 0 , '.$less_than_tab.' } [ : ][ ] |
< dt > | \z
)
)
} xm ' ,
array ( & $this , '_processDefListItems_callback_dd' ), $list_str );
return $list_str ;
2006-02-25 16:48:46 +00:00
}
2007-02-08 11:23:25 +00:00
function _processDefListItems_callback_dt ( $matches ) {
$terms = explode ( " \n " , trim ( $matches [ 1 ]));
$text = '' ;
foreach ( $terms as $term ) {
$term = $this -> runSpanGamut ( trim ( $term ));
$text .= " \n <dt> " . $term . " </dt> " ;
}
return $text . " \n " ;
2006-02-25 16:48:46 +00:00
}
2007-02-08 11:23:25 +00:00
function _processDefListItems_callback_dd ( $matches ) {
$leading_line = $matches [ 1 ];
$def = $matches [ 2 ];
2006-02-25 16:48:46 +00:00
2007-02-08 11:23:25 +00:00
if ( $leading_line || preg_match ( '/\n{2,}/' , $def )) {
$def = $this -> runBlockGamut ( $this -> outdent ( $def . " \n \n " ));
$def = " \n " . $def . " \n " ;
}
else {
$def = rtrim ( $def );
$def = $this -> runSpanGamut ( $this -> outdent ( $def ));
}
2004-07-30 04:02:58 +00:00
2007-02-08 11:23:25 +00:00
return " \n <dd> " . $def . " </dd> \n " ;
}
2004-07-30 04:02:58 +00:00
2007-02-08 11:23:25 +00:00
function doItalicsAndBold ( $text ) {
#
# Redefined to change emphasis by underscore behaviour so that it does not
# work in the middle of a word.
#
# <strong> must go first:
$text = preg_replace_callback ( array (
' {
( # $1: Marker
( ? <! [ a - zA - Z0 - 9 ]) # Not preceded by alphanum
( ? <! __ ) # or by two marker chars.
__
)
( ? = \S ) # Not followed by whitespace
( ? ! __ ) # or two others marker chars.
( # $2: Content
2007-08-21 09:54:49 +00:00
( ?>
2007-02-08 11:23:25 +00:00
[ ^ _ ] + ? # Anthing not em markers.
|
# Balence any regular _ emphasis inside.
( ? <! [ a - zA - Z0 - 9 ]) _ ( ? = \S ) ( .+ ? )
( ? <= \S ) _ ( ? ! [ a - zA - Z0 - 9 ])
|
2007-08-21 09:54:49 +00:00
_ + # Allow unbalenced as last resort.
2007-02-08 11:23:25 +00:00
) + ?
)
( ? <= \S ) __ # End mark not preceded by whitespace.
( ? ! [ a - zA - Z0 - 9 ]) # Not followed by alphanum
( ? ! __ ) # or two others marker chars.
} sx ' ,
' {
( ( ? <! \ * \ * ) \ * \ * ) # $1: Marker (not preceded by two *)
( ? = \S ) # Not followed by whitespace
( ? ! \1 ) # or two others marker chars.
( # $2: Content
2007-08-21 09:54:49 +00:00
( ?>
2007-02-08 11:23:25 +00:00
[ ^* ] + ? # Anthing not em markers.
|
# Balence any regular * emphasis inside.
\ * ( ? = \S ) ( .+ ? ) ( ? <= \S ) \ *
2007-08-21 09:54:49 +00:00
|
\ * # Allow unbalenced as last resort.
2007-02-08 11:23:25 +00:00
) + ?
)
( ? <= \S ) \ * \ * # End mark not preceded by whitespace.
} sx ' ,
),
array ( & $this , '_doItalicAndBold_strong_callback' ), $text );
# Then <em>:
$text = preg_replace_callback ( array (
'{ ( (?<![a-zA-Z0-9])(?<!_)_ ) (?=\S) (?! \1) (.+?) (?<=\S) \1(?![a-zA-Z0-9]) }sx' ,
2007-08-21 09:54:49 +00:00
'{ ( (?<!\*)\* ) (?=\S) (?! \1) (.+?) (?<=\S)(?<!\s\*) \1 }sx' ,
2007-02-08 11:23:25 +00:00
),
array ( & $this , '_doItalicAndBold_em_callback' ), $text );
return $text ;
}
2004-07-30 04:02:58 +00:00
2007-02-08 11:23:25 +00:00
function formParagraphs ( $text ) {
#
# Params:
# $text - string to process with html <p> tags
#
# Strip leading and trailing lines:
2007-09-17 12:37:51 +00:00
$text = preg_replace ( '/\A\n+|\n+\z/' , '' , $text );
2007-02-08 11:23:25 +00:00
$grafs = preg_split ( '/\n{2,}/' , $text , - 1 , PREG_SPLIT_NO_EMPTY );
2004-07-30 04:02:58 +00:00
2007-02-08 11:23:25 +00:00
#
# Wrap <p> tags and unhashify HTML blocks
#
foreach ( $grafs as $key => $value ) {
$value = trim ( $this -> runSpanGamut ( $value ));
# Check if this should be enclosed in a paragraph.
# Clean tag hashes & block tag hashes are left alone.
2007-09-17 12:37:51 +00:00
$is_p = ! preg_match ( '/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/' , $value );
2007-02-08 11:23:25 +00:00
if ( $is_p ) {
$value = " <p> $value </p> " ;
}
$grafs [ $key ] = $value ;
}
# Join grafs in one text, then unhash HTML tags.
$text = implode ( " \n \n " , $grafs );
# Finish by removing any tag hashes still present in $text.
$text = $this -> unhash ( $text );
return $text ;
}
### Footnotes
function stripFootnotes ( $text ) {
#
# Strips link definitions from text, stores the URLs and titles in
# hash references.
#
$less_than_tab = $this -> tab_width - 1 ;
2004-07-30 04:02:58 +00:00
2007-02-08 11:23:25 +00:00
# Link defs are in the form: [^id]: url "optional title"
$text = preg_replace_callback ( ' {
^ [ ]{ 0 , '.$less_than_tab.' } \ [ \ ^ ( .+ ? ) \ ][ ] ? : # note_id = $1
2007-08-21 09:54:49 +00:00
[ ] *
2007-02-08 11:23:25 +00:00
\n ? # maybe *one* newline
( # text = $2 (no blank lines allowed)
( ? :
.+ # actual text
2006-02-25 16:48:46 +00:00
|
2007-02-08 11:23:25 +00:00
\n # newlines but
( ? ! \ [ \ ^.+ ? \ ] : \s ) # negative lookahead for footnote marker.
( ? ! \n + [ ]{ 0 , 3 } \S ) # ensure line is not blank and followed
# by non-indented content
) *
)
} xm ' ,
array ( & $this , '_stripFootnotes_callback' ),
$text );
return $text ;
}
function _stripFootnotes_callback ( $matches ) {
2007-08-21 09:54:49 +00:00
$note_id = $this -> fn_id_prefix . $matches [ 1 ];
2007-02-08 11:23:25 +00:00
$this -> footnotes [ $note_id ] = $this -> outdent ( $matches [ 2 ]);
return '' ; # String that will replace the block
}
2004-07-30 04:02:58 +00:00
2007-02-08 11:23:25 +00:00
function doFootnotes ( $text ) {
#
# Replace footnote references in $text [^id] with a special text-token
# which will be can be
#
2007-08-21 09:54:49 +00:00
if ( ! $this -> in_footnote && ! $this -> in_anchor ) {
$text = preg_replace ( '{\[\^(.+?)\]}' , " F \x1A fn: \\ 1 \x1A : " , $text );
}
2007-02-08 11:23:25 +00:00
return $text ;
}
2004-07-30 04:02:58 +00:00
2006-02-25 16:48:46 +00:00
2007-02-08 11:23:25 +00:00
function appendFootnotes ( $text ) {
2004-07-30 04:02:58 +00:00
#
2007-02-08 11:23:25 +00:00
# Append footnote list to text.
2004-07-30 04:02:58 +00:00
#
2007-08-21 09:54:49 +00:00
$text = preg_replace_callback ( '{F\x1Afn:(.*?)\x1A:}' ,
2007-02-08 11:23:25 +00:00
array ( & $this , '_appendFootnotes_callback' ), $text );
if ( ! empty ( $this -> footnotes_ordered )) {
$text .= " \n \n " ;
$text .= " <div class= \" footnotes \" > \n " ;
$text .= " <hr " . MARKDOWN_EMPTY_ELEMENT_SUFFIX . " \n " ;
$text .= " <ol> \n \n " ;
$attr = " rev= \" footnote \" " ;
if ( $this -> fn_backlink_class != " " ) {
$class = $this -> fn_backlink_class ;
$class = $this -> encodeAmpsAndAngles ( $class );
$class = str_replace ( '"' , '"' , $class );
$attr .= " class= \" $class\ " " ;
}
if ( $this -> fn_backlink_title != " " ) {
$title = $this -> fn_backlink_title ;
$title = $this -> encodeAmpsAndAngles ( $title );
$title = str_replace ( '"' , '"' , $title );
$attr .= " title= \" $title\ " " ;
}
$num = 0 ;
2007-08-21 09:54:49 +00:00
$this -> in_footnote = true ;
2007-02-08 11:23:25 +00:00
foreach ( $this -> footnotes_ordered as $note_id => $footnote ) {
$footnote .= " \n " ; # Need to append newline before parsing.
$footnote = $this -> runBlockGamut ( " $footnote\n " );
$attr2 = str_replace ( " %% " , ++ $num , $attr );
# Add backlink to last paragraph; create new paragraph if needed.
$backlink = " <a href= \" #fnref: $note_id\ " $attr2 >& #8617;</a>";
if ( preg_match ( '{</p>$}' , $footnote )) {
$footnote = substr ( $footnote , 0 , - 4 ) . "   $backlink </p> " ;
} else {
$footnote .= " \n \n <p> $backlink </p> " ;
}
$text .= " <li id= \" fn: $note_id\ " > \n " ;
$text .= $footnote . " \n " ;
$text .= " </li> \n \n " ;
}
2007-08-21 09:54:49 +00:00
$this -> in_footnote = false ;
2007-02-08 11:23:25 +00:00
$text .= " </ol> \n " ;
$text .= " </div> " ;
}
return $text ;
}
function _appendFootnotes_callback ( $matches ) {
$node_id = $this -> fn_id_prefix . $matches [ 1 ];
2006-02-25 16:48:46 +00:00
2007-02-08 11:23:25 +00:00
# Create footnote marker only if it has a corresponding footnote *and*
# the footnote hasn't been used by another marker.
if ( isset ( $this -> footnotes [ $node_id ])) {
# Transfert footnote content to the ordered list.
$this -> footnotes_ordered [ $node_id ] = $this -> footnotes [ $node_id ];
unset ( $this -> footnotes [ $node_id ]);
$num = count ( $this -> footnotes_ordered );
$attr = " rel= \" footnote \" " ;
if ( $this -> fn_link_class != " " ) {
$class = $this -> fn_link_class ;
$class = $this -> encodeAmpsAndAngles ( $class );
$class = str_replace ( '"' , '"' , $class );
$attr .= " class= \" $class\ " " ;
}
if ( $this -> fn_link_title != " " ) {
$title = $this -> fn_link_title ;
$title = $this -> encodeAmpsAndAngles ( $title );
$title = str_replace ( '"' , '"' , $title );
$attr .= " title= \" $title\ " " ;
}
$attr = str_replace ( " %% " , $num , $attr );
return
" <sup id= \" fnref: $node_id\ " > " .
" <a href= \" #fn: $node_id\ " $attr > $num </ a > " .
" </sup> " ;
2004-07-30 04:02:58 +00:00
}
2007-02-08 11:23:25 +00:00
return " [^ " . $matches [ 1 ] . " ] " ;
2004-07-30 04:02:58 +00:00
}
2007-02-08 11:23:25 +00:00
2006-02-25 16:48:46 +00:00
2007-02-08 11:23:25 +00:00
### Abbreviations ###
2006-02-25 16:48:46 +00:00
2007-02-08 11:23:25 +00:00
function stripAbbreviations ( $text ) {
#
2007-08-21 09:54:49 +00:00
# Strips abbreviations from text, stores titles in hash references.
2007-02-08 11:23:25 +00:00
#
$less_than_tab = $this -> tab_width - 1 ;
# Link defs are in the form: [id]*: url "optional title"
$text = preg_replace_callback ( ' {
^ [ ]{ 0 , '.$less_than_tab.' } \ * \ [( .+ ? ) \ ][ ] ? : # abbr_id = $1
( .* ) # text = $2 (no blank lines allowed)
} xm ' ,
array ( & $this , '_stripAbbreviations_callback' ),
$text );
return $text ;
2004-07-30 04:02:58 +00:00
}
2007-02-08 11:23:25 +00:00
function _stripAbbreviations_callback ( $matches ) {
$abbr_word = $matches [ 1 ];
$abbr_desc = $matches [ 2 ];
$this -> abbr_matches [] = preg_quote ( $abbr_word );
$this -> abbr_desciptions [ $abbr_word ] = trim ( $abbr_desc );
return '' ; # String that will replace the block
}
2005-04-11 14:08:01 +00:00
2007-02-08 11:23:25 +00:00
function doAbbreviations ( $text ) {
#
2007-08-21 09:54:49 +00:00
# Find defined abbreviations in text and wrap them in <abbr> elements.
2007-02-08 11:23:25 +00:00
#
if ( $this -> abbr_matches ) {
2007-08-21 09:54:49 +00:00
// cannot use the /x modifier because abbr_matches may
// contain spaces:
$text = preg_replace_callback ( '{' .
'(?<![\w\x1A])' .
'(?:' . implode ( '|' , $this -> abbr_matches ) . ')' .
'(?![\w\x1A])' .
'}' ,
2007-02-08 11:23:25 +00:00
array ( & $this , '_doAbbreviations_callback' ), $text );
}
return $text ;
}
function _doAbbreviations_callback ( $matches ) {
$abbr = $matches [ 0 ];
if ( isset ( $this -> abbr_desciptions [ $abbr ])) {
$desc = $this -> abbr_desciptions [ $abbr ];
if ( empty ( $desc )) {
2007-09-17 12:37:51 +00:00
return $this -> hashPart ( " <abbr> $abbr </abbr> " );
2007-02-08 11:23:25 +00:00
} else {
2007-09-17 12:37:51 +00:00
$desc = htmlspecialchars ( $desc , ENT_NOQUOTES );
return $this -> hashPart ( " <abbr title= \" $desc\ " > $abbr </ abbr > " );
2007-02-08 11:23:25 +00:00
}
} else {
return $matches [ 0 ];
2005-04-11 14:08:01 +00:00
}
}
2004-07-30 04:02:58 +00:00
}
2009-05-26 02:46:09 +00:00
/**
* PHP Markdown Extra
* ==================
*
* Description
* -----------
*
* This is a PHP port of the original Markdown formatter written in Perl
* by John Gruber . This special " Extra " version of PHP Markdown features
* further enhancements to the syntax for making additional constructs
* such as tables and definition list .
*
* Markdown is a text - to - HTML filter ; it translates an easy - to - read /
* easy - to - write structured text format into HTML . Markdown ' s text format
* is most similar to that of plain text email , and supports features such
* as headers , * emphasis * , code blocks , blockquotes , and links .
*
* Markdown ' s syntax is designed not as a generic markup language , but
* specifically to serve as a front - end to ( X ) HTML . You can use span - level
* HTML tags anywhere in a Markdown document , and you can use block level
* HTML tags ( like < div > and < table > as well ) .
*
* For more information about Markdown ' s syntax , see :
*
* < http :// daringfireball . net / projects / markdown />
*
*
* Bugs
* ----
*
* To file bug reports please send email to :
*
* < michel . fortin @ michelf . com >
*
* Please include with your report : ( 1 ) the example input ; ( 2 ) the output you
* expected ; ( 3 ) the output Markdown actually produced .
*
*
* Version History
* ---------------
*
* See the readme file for detailed release notes for this version .
*
*
* Copyright and License
* ---------------------
*
* PHP Markdown & Extra
* Copyright ( c ) 2004 - 2007 Michel Fortin
* < http :// www . michelf . com />
* All rights reserved .
*
* Based on Markdown
* Copyright ( c ) 2003 - 2006 John Gruber
* < http :// daringfireball . net />
* All rights reserved .
*
* Redistribution and use in source and binary forms , with or without
* modification , are permitted provided that the following conditions are
* met :
*
* - Redistributions of source code must retain the above copyright notice ,
* this list of conditions and the following disclaimer .
*
* - Redistributions in binary form must reproduce the above copyright
* notice , this list of conditions and the following disclaimer in the
* documentation and / or other materials provided with the distribution .
*
* - Neither the name " Markdown " nor the names of its contributors may
* be used to endorse or promote products derived from this software
* without specific prior written permission .
*
* This software is provided by the copyright holders and contributors " as
* is " and any express or implied warranties, including, but not limited
* to , the implied warranties of merchantability and fitness for a
* particular purpose are disclaimed . In no event shall the copyright owner
* or contributors be liable for any direct , indirect , incidental , special ,
* exemplary , or consequential damages ( including , but not limited to ,
* procurement of substitute goods or services ; loss of use , data , or
* profits ; or business interruption ) however caused and on any theory of
* liability , whether in contract , strict liability , or tort ( including
* negligence or otherwise ) arising in any way out of the use of this
* software , even if advised of the possibility of such damage .
*/
2006-02-25 16:48:46 +00:00
?>