From 5b3300c53006e5b093948ebdd7a8155b7ab29308 Mon Sep 17 00:00:00 2001 From: Ryan Cramer Date: Fri, 9 Jul 2021 12:03:05 -0400 Subject: [PATCH] Upgrade TextformatterMarkdownExtra to use new versions of Parsedown and Parsedown Extra, update module config for new options and add a test option. This update also fixes processwire/processwire-issues#1401 --- .../TextformatterMarkdownExtra.module | 138 +- .../parsedown-extra/ParsedownExtra.php | 262 ++- .../parsedown-extra/README.md | 6 +- .../parsedown/LICENSE.txt | 4 +- .../parsedown/Parsedown.php | 1436 +++++++++++------ .../parsedown/README.md | 91 +- 6 files changed, 1348 insertions(+), 589 deletions(-) diff --git a/wire/modules/Textformatter/TextformatterMarkdownExtra/TextformatterMarkdownExtra.module b/wire/modules/Textformatter/TextformatterMarkdownExtra/TextformatterMarkdownExtra.module index 55f06eee..7d349316 100644 --- a/wire/modules/Textformatter/TextformatterMarkdownExtra/TextformatterMarkdownExtra.module +++ b/wire/modules/Textformatter/TextformatterMarkdownExtra/TextformatterMarkdownExtra.module @@ -3,7 +3,7 @@ /** * ProcessWire Markdown Textformatter * - * ProcessWire 3.x, Copyright 2016 by Ryan Cramer + * ProcessWire 3.x, Copyright 2021 by Ryan Cramer * https://processwire.com * @@ -11,6 +11,7 @@ * Markdown invented by John Gruber. * * @property int $flavor Markdown flavor (see flavor constants) + * @property bool|int $safeMode Whether or not we are in safe mode (https://github.com/erusev/parsedown#security) * */ @@ -19,7 +20,7 @@ class TextformatterMarkdownExtra extends Textformatter implements ConfigurableMo public static function getModuleInfo() { return array( 'title' => 'Markdown/Parsedown Extra', - 'version' => 130, + 'version' => 180, 'summary' => "Markdown/Parsedown extra lightweight markup language by Emanuil Rusev. Based on Markdown by John Gruber.", ); } @@ -31,51 +32,88 @@ class TextformatterMarkdownExtra extends Textformatter implements ConfigurableMo const flavorRCD = 16; // add RCD extensions to one of above, @see markdownExtensions() method (@deprecated) public function __construct() { + parent::__construct(); $this->set('flavor', self::flavorDefault); + $this->set('safeMode', false); } public function format(&$str) { - $str = $this->markdown($str, $this->flavor); + $str = $this->markdown($str, (int) $this->flavor); } public function formatValue(Page $page, Field $field, &$value) { - $value = $this->markdown($value, $this->flavor); + $value = $this->markdown($value, (int) $this->flavor); + } + + /** + * Get or set safe mode + * + * “Parsedown is capable of escaping user-input within the HTML that it generates. + * Additionally Parsedown will apply sanitisation to additional scripting vectors + * (such as scripting link destinations) that are introduced by the markdown syntax + * itself. To tell Parsedown that it is processing untrusted user-input, use the + * following...” See: + * + * @param bool|null $safeMode + * @return bool + * + */ + public function safeMode($safeMode = null) { + if($safeMode !== null) $this->safeMode = $safeMode ? true : false; + return (bool) $this->safeMode; } /** * Given a string, return a version processed with markdown * * @param $str String to process - * @param int $flavor Flavor of markdown (default=parsedown extra) + * @param int|null $flavor Flavor of markdown (null=use module configured setting) + * @param bool|null $safeMode Safe mode ON or OFF (null=use module configured setting) * @return string Processed string * */ - public function markdown($str, $flavor = 0) { - - if(!class_exists("\\Parsedown")) { - require_once(dirname(__FILE__) . "/parsedown/Parsedown.php"); - } - - if($flavor & self::flavorParsedown) { - // regular parsedown - $parsedown = new \Parsedown(); - $str = $parsedown->text($str); - - } else { - // parsedown extra - if(!class_exists("\\ParsedownExtra")) { - require_once(dirname(__FILE__) . "/parsedown-extra/ParsedownExtra.php"); - } - $extra = new \ParsedownExtra(); - $str = $extra->text($str); - } - + public function markdown($str, $flavor = null, $safeMode = null) { + $parsedown = $this->getParsedown($flavor); + if(is_bool($safeMode)) $parsedown->setSafeMode($safeMode); + $str = $parsedown->text($str); if($flavor & self::flavorRCD) $this->markdownExtensions($str); - return $str; } + /** + * Given a string, return a version processed with markdown in safe mode + * + * https://github.com/erusev/parsedown#security + * + * @param $str String to process + * @param int $flavor Flavor of markdown (default=parsedown extra) + * @return string Processed string + * + */ + public function markdownSafe($str, $flavor = 0) { + return $this->markdown($str, $flavor, true); + } + /** + * @param int|null $flavor + * @return \ParsedownExtra|\Parsedown + * + */ + public function getParsedown($flavor = null) { + if($flavor === null) $flavor = (int) $this->flavor; + if(!class_exists("\\Parsedown")) require_once(dirname(__FILE__) . "/parsedown/Parsedown.php"); + if(!class_exists("\\ParsedownExtra")) require_once(dirname(__FILE__) . "/parsedown-extra/ParsedownExtra.php"); + if($flavor & self::flavorParsedown) { + // regular parsedown + $parsedown = new \Parsedown(); + } else { + // parsedown extra + $parsedown = new \ParsedownExtra(); + } + if($this->safeMode) $parsedown->setSafeMode(true); + return $parsedown; + } + /** * A few RCD extentions to MarkDown syntax, to be executed after Markdown has already had it's way with the text * @@ -98,16 +136,52 @@ class TextformatterMarkdownExtra extends Textformatter implements ConfigurableMo if(strpos($str, '/*') !== false) $str = preg_replace('{/\*.*?\*/}s', '', $str); if(strpos($str, '//') !== false) $str = preg_replace('{^//.*$}m', '', $str); } - - public function getModuleConfigInputfields(array $data) { - $inputfields = $this->wire(new InputfieldWrapper()); - $f = $this->wire('modules')->get('InputfieldRadios'); + + /** + * @param InputfieldWrapper $inputfields + * + */ + public function getModuleConfigInputfields(InputfieldWrapper $inputfields) { + $f = $inputfields->InputfieldRadios; $f->attr('name', 'flavor'); $f->label = $this->_('Markdown flavor to use'); $f->addOption(self::flavorParsedownExtra, 'Parsedown Extra'); $f->addOption(self::flavorParsedown, 'Parsedown'); - $f->attr('value', isset($data['flavor']) ? (int) $data['flavor'] : self::flavorDefault); + $f->attr('value', (int) $this->flavor); $inputfields->add($f); - return $inputfields; + + $f = $inputfields->InputfieldToggle; + $f->attr('name', 'safeMode'); + $f->label = $this->_('Safe mode?'); + $f->description = $this->_('Enable this to Markdown that it is processing untrusted user-input.') . ' ' . + "[" . $this->_('Details') . "](https://github.com/erusev/parsedown#security)"; + $f->val((int) $this->safeMode); + $inputfields->add($f); + + $f = $inputfields->InputfieldTextarea; + $f->attr('name', '_test_markdown'); + $f->label = $this->_('Test markdown'); + $f->description = $this->_('Enter some markdown and click submit to test the result with your current settings.'); + $f->collapsed = Inputfield::collapsedBlank; + $f->icon = 'flask'; + $inputfields->add($f); + + $session = $this->wire()->session; + $text = $this->wire()->input->post('_test_markdown'); + if($text) { + $session->setFor($this, 'text', $text); + } else { + $text = $session->getFor($this, 'text'); + $session->removeFor($this, 'text'); + $f->val($text); + if($text) { + $markup = $this->markdown($text); + $this->message("$f->label results:
" . + "
" . $this->wire()->sanitizer->entities($markup) . '
', + Notice::allowMarkup | Notice::noGroup + ); + } + } + } } diff --git a/wire/modules/Textformatter/TextformatterMarkdownExtra/parsedown-extra/ParsedownExtra.php b/wire/modules/Textformatter/TextformatterMarkdownExtra/parsedown-extra/ParsedownExtra.php index b9574549..8cdb5d21 100755 --- a/wire/modules/Textformatter/TextformatterMarkdownExtra/parsedown-extra/ParsedownExtra.php +++ b/wire/modules/Textformatter/TextformatterMarkdownExtra/parsedown-extra/ParsedownExtra.php @@ -1,4 +1,4 @@ -BlockTypes[':'] []= 'DefinitionList'; @@ -43,7 +43,13 @@ class ParsedownExtra extends Parsedown function text($text) { - $markup = parent::text($text); + $Elements = $this->textElements($text); + + # convert to markup + $markup = $this->elements($Elements); + + # trim line breaks + $markup = trim($markup, "\n"); # merge consecutive dl elements @@ -139,25 +145,27 @@ class ParsedownExtra extends Parsedown protected function blockDefinitionList($Line, $Block) { - if ( ! isset($Block) or isset($Block['type'])) + if ( ! isset($Block) or $Block['type'] !== 'Paragraph') { return; } $Element = array( 'name' => 'dl', - 'handler' => 'elements', - 'text' => array(), + 'elements' => array(), ); - $terms = explode("\n", $Block['element']['text']); + $terms = explode("\n", $Block['element']['handler']['argument']); foreach ($terms as $term) { - $Element['text'] []= array( + $Element['elements'] []= array( 'name' => 'dt', - 'handler' => 'line', - 'text' => $term, + 'handler' => array( + 'function' => 'lineElements', + 'argument' => $term, + 'destination' => 'elements' + ), ); } @@ -185,15 +193,17 @@ class ParsedownExtra extends Parsedown if (isset($Block['interrupted'])) { - $Block['dd']['handler'] = 'text'; - $Block['dd']['text'] .= "\n\n"; + $Block['dd']['handler']['function'] = 'textElements'; + $Block['dd']['handler']['argument'] .= "\n\n"; + + $Block['dd']['handler']['destination'] = 'elements'; unset($Block['interrupted']); } $text = substr($Line['body'], min($Line['indent'], 4)); - $Block['dd']['text'] .= "\n" . $text; + $Block['dd']['handler']['argument'] .= "\n" . $text; return $Block; } @@ -206,13 +216,13 @@ class ParsedownExtra extends Parsedown { $Block = parent::blockHeader($Line); - if (preg_match('/[ #]*{('.$this->regexAttribute.'+)}[ ]*$/', $Block['element']['text'], $matches, PREG_OFFSET_CAPTURE)) + if ($Block !== null && preg_match('/[ #]*{('.$this->regexAttribute.'+)}[ ]*$/', $Block['element']['handler']['argument'], $matches, PREG_OFFSET_CAPTURE)) { $attributeString = $matches[1][0]; $Block['element']['attributes'] = $this->parseAttributeData($attributeString); - $Block['element']['text'] = substr($Block['element']['text'], 0, $matches[0][1]); + $Block['element']['handler']['argument'] = substr($Block['element']['handler']['argument'], 0, $matches[0][1]); } return $Block; @@ -221,11 +231,98 @@ class ParsedownExtra extends Parsedown # # Markup + protected function blockMarkup($Line) + { + if ($this->markupEscaped or $this->safeMode) + { + return; + } + + if (preg_match('/^<(\w[\w-]*)(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*(\/)?>/', $Line['text'], $matches)) + { + $element = strtolower($matches[1]); + + if (in_array($element, $this->textLevelElements)) + { + return; + } + + $Block = array( + 'name' => $matches[1], + 'depth' => 0, + 'element' => array( + 'rawHtml' => $Line['text'], + 'autobreak' => true, + ), + ); + + $length = strlen($matches[0]); + $remainder = substr($Line['text'], $length); + + if (trim($remainder) === '') + { + if (isset($matches[2]) or in_array($matches[1], $this->voidElements)) + { + $Block['closed'] = true; + $Block['void'] = true; + } + } + else + { + if (isset($matches[2]) or in_array($matches[1], $this->voidElements)) + { + return; + } + if (preg_match('/<\/'.$matches[1].'>[ ]*$/i', $remainder)) + { + $Block['closed'] = true; + } + } + + return $Block; + } + } + + protected function blockMarkupContinue($Line, array $Block) + { + if (isset($Block['closed'])) + { + return; + } + + if (preg_match('/^<'.$Block['name'].'(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*>/i', $Line['text'])) # open + { + $Block['depth'] ++; + } + + if (preg_match('/(.*?)<\/'.$Block['name'].'>[ ]*$/i', $Line['text'], $matches)) # close + { + if ($Block['depth'] > 0) + { + $Block['depth'] --; + } + else + { + $Block['closed'] = true; + } + } + + if (isset($Block['interrupted'])) + { + $Block['element']['rawHtml'] .= "\n"; + unset($Block['interrupted']); + } + + $Block['element']['rawHtml'] .= "\n".$Line['body']; + + return $Block; + } + protected function blockMarkupComplete($Block) { if ( ! isset($Block['void'])) { - $Block['markup'] = $this->processTag($Block['markup']); + $Block['element']['rawHtml'] = $this->processTag($Block['element']['rawHtml']); } return $Block; @@ -238,13 +335,13 @@ class ParsedownExtra extends Parsedown { $Block = parent::blockSetextHeader($Line, $Block); - if (preg_match('/[ ]*{('.$this->regexAttribute.'+)}[ ]*$/', $Block['element']['text'], $matches, PREG_OFFSET_CAPTURE)) + if ($Block !== null && preg_match('/[ ]*{('.$this->regexAttribute.'+)}[ ]*$/', $Block['element']['handler']['argument'], $matches, PREG_OFFSET_CAPTURE)) { $attributeString = $matches[1][0]; $Block['element']['attributes'] = $this->parseAttributeData($attributeString); - $Block['element']['text'] = substr($Block['element']['text'], 0, $matches[0][1]); + $Block['element']['handler']['argument'] = substr($Block['element']['handler']['argument'], 0, $matches[0][1]); } return $Block; @@ -278,8 +375,7 @@ class ParsedownExtra extends Parsedown $Element = array( 'name' => 'sup', 'attributes' => array('id' => 'fnref'.$this->DefinitionData['Footnote'][$name]['count'].':'.$name), - 'handler' => 'element', - 'text' => array( + 'element' => array( 'name' => 'a', 'attributes' => array('href' => '#fn:'.$name, 'class' => 'footnote-ref'), 'text' => $this->DefinitionData['Footnote'][$name]['number'], @@ -302,7 +398,7 @@ class ParsedownExtra extends Parsedown { $Link = parent::inlineLink($Excerpt); - $remainder = substr($Excerpt['text'], $Link['extent']); + $remainder = $Link !== null ? substr($Excerpt['text'], $Link['extent']) : ''; if (preg_match('/^[ ]*{('.$this->regexAttribute.'+)}/', $remainder, $matches)) { @@ -318,21 +414,52 @@ class ParsedownExtra extends Parsedown # ~ # - protected function unmarkedText($text) + private $currentAbreviation; + private $currentMeaning; + + protected function insertAbreviation(array $Element) { - $text = parent::unmarkedText($text); + if (isset($Element['text'])) + { + $Element['elements'] = self::pregReplaceElements( + '/\b'.preg_quote($this->currentAbreviation, '/').'\b/', + array( + array( + 'name' => 'abbr', + 'attributes' => array( + 'title' => $this->currentMeaning, + ), + 'text' => $this->currentAbreviation, + ) + ), + $Element['text'] + ); + + unset($Element['text']); + } + + return $Element; + } + + protected function inlineText($text) + { + $Inline = parent::inlineText($text); if (isset($this->DefinitionData['Abbreviation'])) { foreach ($this->DefinitionData['Abbreviation'] as $abbreviation => $meaning) { - $pattern = '/\b'.preg_quote($abbreviation).'\b/'; + $this->currentAbreviation = $abbreviation; + $this->currentMeaning = $meaning; - $text = preg_replace($pattern, ''.$abbreviation.'', $text); + $Inline['element'] = $this->elementApplyRecursiveDepthFirst( + array($this, 'insertAbreviation'), + $Inline['element'] + ); } } - return $text; + return $Inline; } # @@ -348,18 +475,21 @@ class ParsedownExtra extends Parsedown $Block['dd'] = array( 'name' => 'dd', - 'handler' => 'line', - 'text' => $text, + 'handler' => array( + 'function' => 'lineElements', + 'argument' => $text, + 'destination' => 'elements' + ), ); if (isset($Block['interrupted'])) { - $Block['dd']['handler'] = 'text'; + $Block['dd']['handler']['function'] = 'textElements'; unset($Block['interrupted']); } - $Block['element']['text'] []= & $Block['dd']; + $Block['element']['elements'] []= & $Block['dd']; return $Block; } @@ -369,15 +499,11 @@ class ParsedownExtra extends Parsedown $Element = array( 'name' => 'div', 'attributes' => array('class' => 'footnotes'), - 'handler' => 'elements', - 'text' => array( - array( - 'name' => 'hr', - ), + 'elements' => array( + array('name' => 'hr'), array( 'name' => 'ol', - 'handler' => 'elements', - 'text' => array(), + 'elements' => array(), ), ), ); @@ -393,34 +519,68 @@ class ParsedownExtra extends Parsedown $text = $DefinitionData['text']; - $text = parent::text($text); + $textElements = parent::textElements($text); $numbers = range(1, $DefinitionData['count']); - $backLinksMarkup = ''; + $backLinkElements = array(); foreach ($numbers as $number) { - $backLinksMarkup .= ' '; + $backLinkElements[] = array('text' => ' '); + $backLinkElements[] = array( + 'name' => 'a', + 'attributes' => array( + 'href' => "#fnref$number:$definitionId", + 'rev' => 'footnote', + 'class' => 'footnote-backref', + ), + 'rawHtml' => '↩', + 'allowRawHtmlInSafeMode' => true, + 'autobreak' => false, + ); } - $backLinksMarkup = substr($backLinksMarkup, 1); + unset($backLinkElements[0]); - if (substr($text, - 4) === '

') + $n = count($textElements) -1; + + if ($textElements[$n]['name'] === 'p') { - $backLinksMarkup = ' '.$backLinksMarkup; + $backLinkElements = array_merge( + array( + array( + 'rawHtml' => ' ', + 'allowRawHtmlInSafeMode' => true, + ), + ), + $backLinkElements + ); - $text = substr_replace($text, $backLinksMarkup.'

', - 4); + unset($textElements[$n]['name']); + + $textElements[$n] = array( + 'name' => 'p', + 'elements' => array_merge( + array($textElements[$n]), + $backLinkElements + ), + ); } else { - $text .= "\n".'

'.$backLinksMarkup.'

'; + $textElements[] = array( + 'name' => 'p', + 'elements' => $backLinkElements + ); } - $Element['text'][1]['text'] []= array( + $Element['elements'][1]['elements'] []= array( 'name' => 'li', 'attributes' => array('id' => 'fn:'.$definitionId), - 'text' => "\n".$text."\n", + 'elements' => array_merge( + $textElements + ), ); } @@ -503,10 +663,10 @@ class ParsedownExtra extends Parsedown } # because we don't want for markup to get encoded - $DOMDocument->documentElement->nodeValue = 'placeholder'; + $DOMDocument->documentElement->nodeValue = 'placeholder\x1A'; $markup = $DOMDocument->saveHTML($DOMDocument->documentElement); - $markup = str_replace('placeholder', $elementText, $markup); + $markup = str_replace('placeholder\x1A', $elementText, $markup); return $markup; } diff --git a/wire/modules/Textformatter/TextformatterMarkdownExtra/parsedown-extra/README.md b/wire/modules/Textformatter/TextformatterMarkdownExtra/parsedown-extra/README.md index ee6ef87f..cee4b543 100755 --- a/wire/modules/Textformatter/TextformatterMarkdownExtra/parsedown-extra/README.md +++ b/wire/modules/Textformatter/TextformatterMarkdownExtra/parsedown-extra/README.md @@ -1,8 +1,12 @@ +> You might also like [Caret](http://caret.io?ref=parsedown) - our Markdown editor for the Desktop. + ## Parsedown Extra +[![Build Status](https://img.shields.io/travis/erusev/parsedown-extra/master.svg?style=flat-square)](https://travis-ci.org/erusev/parsedown-extra) + An extension of [Parsedown](http://parsedown.org) that adds support for [Markdown Extra](https://michelf.ca/projects/php-markdown/extra/). -[[ demo ]](http://parsedown.org/extra/) +[See Demo](http://parsedown.org/extra/) ### Installation diff --git a/wire/modules/Textformatter/TextformatterMarkdownExtra/parsedown/LICENSE.txt b/wire/modules/Textformatter/TextformatterMarkdownExtra/parsedown/LICENSE.txt index baca86f5..8e7c764d 100755 --- a/wire/modules/Textformatter/TextformatterMarkdownExtra/parsedown/LICENSE.txt +++ b/wire/modules/Textformatter/TextformatterMarkdownExtra/parsedown/LICENSE.txt @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) 2013 Emanuil Rusev, erusev.com +Copyright (c) 2013-2018 Emanuil Rusev, erusev.com Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in @@ -17,4 +17,4 @@ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. \ No newline at end of file +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/wire/modules/Textformatter/TextformatterMarkdownExtra/parsedown/Parsedown.php b/wire/modules/Textformatter/TextformatterMarkdownExtra/parsedown/Parsedown.php index 3a6ee449..ae0cbdec 100755 --- a/wire/modules/Textformatter/TextformatterMarkdownExtra/parsedown/Parsedown.php +++ b/wire/modules/Textformatter/TextformatterMarkdownExtra/parsedown/Parsedown.php @@ -17,11 +17,24 @@ class Parsedown { # ~ - const version = '1.5.1'; + const version = '1.8.0-beta-7'; # ~ function text($text) + { + $Elements = $this->textElements($text); + + # convert to markup + $markup = $this->elements($Elements); + + # trim line breaks + $markup = trim($markup, "\n"); + + return $markup; + } + + protected function textElements($text) { # make sure no definitions are set $this->DefinitionData = array(); @@ -36,12 +49,7 @@ class Parsedown $lines = explode("\n", $text); # iterate through lines to identify blocks - $markup = $this->lines($lines); - - # trim line breaks - $markup = trim($markup, "\n"); - - return $markup; + return $this->linesElements($lines); } # @@ -75,6 +83,42 @@ class Parsedown protected $urlsLinked = true; + function setSafeMode($safeMode) + { + $this->safeMode = (bool) $safeMode; + + return $this; + } + + protected $safeMode; + + function setStrictMode($strictMode) + { + $this->strictMode = (bool) $strictMode; + + return $this; + } + + protected $strictMode; + + protected $safeLinksWhitelist = array( + 'http://', + 'https://', + 'ftp://', + 'ftps://', + 'mailto:', + 'tel:', + 'data:image/png;base64,', + 'data:image/gif;base64,', + 'data:image/jpeg;base64,', + 'irc:', + 'ircs:', + 'git:', + 'ssh:', + 'news:', + 'steam:', + ); + # # Lines # @@ -107,12 +151,6 @@ class Parsedown # ~ - protected $DefinitionTypes = array( - '[' => array('Reference'), - ); - - # ~ - protected $unmarkedBlockTypes = array( 'Code', ); @@ -121,8 +159,14 @@ class Parsedown # Blocks # - private function lines(array $lines) + protected function lines(array $lines) { + return $this->elements($this->linesElements($lines)); + } + + protected function linesElements(array $lines) + { + $Elements = array(); $CurrentBlock = null; foreach ($lines as $line) @@ -131,35 +175,25 @@ class Parsedown { if (isset($CurrentBlock)) { - $CurrentBlock['interrupted'] = true; + $CurrentBlock['interrupted'] = (isset($CurrentBlock['interrupted']) + ? $CurrentBlock['interrupted'] + 1 : 1 + ); } continue; } - if (strpos($line, "\t") !== false) + while (($beforeTab = strstr($line, "\t", true)) !== false) { - $parts = explode("\t", $line); + $shortage = 4 - mb_strlen($beforeTab, 'utf-8') % 4; - $line = $parts[0]; - - unset($parts[0]); - - foreach ($parts as $part) - { - $shortage = 4 - mb_strlen($line, 'utf-8') % 4; - - $line .= str_repeat(' ', $shortage); - $line .= $part; - } + $line = $beforeTab + . str_repeat(' ', $shortage) + . substr($line, strlen($beforeTab) + 1) + ; } - $indent = 0; - - while (isset($line[$indent]) and $line[$indent] === ' ') - { - $indent ++; - } + $indent = strspn($line, ' '); $text = $indent > 0 ? substr($line, $indent) : $line; @@ -169,9 +203,10 @@ class Parsedown # ~ - if (isset($CurrentBlock['incomplete'])) + if (isset($CurrentBlock['continuable'])) { - $Block = $this->{'block'.$CurrentBlock['type'].'Continue'}($Line, $CurrentBlock); + $methodName = 'block' . $CurrentBlock['type'] . 'Continue'; + $Block = $this->$methodName($Line, $CurrentBlock); if (isset($Block)) { @@ -181,12 +216,11 @@ class Parsedown } else { - if (method_exists($this, 'block'.$CurrentBlock['type'].'Complete')) + if ($this->isBlockCompletable($CurrentBlock['type'])) { - $CurrentBlock = $this->{'block'.$CurrentBlock['type'].'Complete'}($CurrentBlock); + $methodName = 'block' . $CurrentBlock['type'] . 'Complete'; + $CurrentBlock = $this->$methodName($CurrentBlock); } - - unset($CurrentBlock['incomplete']); } } @@ -211,7 +245,7 @@ class Parsedown foreach ($blockTypes as $blockType) { - $Block = $this->{'block'.$blockType}($Line, $CurrentBlock); + $Block = $this->{"block$blockType"}($Line, $CurrentBlock); if (isset($Block)) { @@ -219,14 +253,17 @@ class Parsedown if ( ! isset($Block['identified'])) { - $Blocks []= $CurrentBlock; + if (isset($CurrentBlock)) + { + $Elements[] = $this->extractElement($CurrentBlock); + } $Block['identified'] = true; } - if (method_exists($this, 'block'.$blockType.'Continue')) + if ($this->isBlockContinuable($blockType)) { - $Block['incomplete'] = true; + $Block['continuable'] = true; } $CurrentBlock = $Block; @@ -237,13 +274,21 @@ class Parsedown # ~ - if (isset($CurrentBlock) and ! isset($CurrentBlock['type']) and ! isset($CurrentBlock['interrupted'])) + if (isset($CurrentBlock) and $CurrentBlock['type'] === 'Paragraph') { - $CurrentBlock['element']['text'] .= "\n".$text; + $Block = $this->paragraphContinue($Line, $CurrentBlock); + } + + if (isset($Block)) + { + $CurrentBlock = $Block; } else { - $Blocks []= $CurrentBlock; + if (isset($CurrentBlock)) + { + $Elements[] = $this->extractElement($CurrentBlock); + } $CurrentBlock = $this->paragraph($Line); @@ -253,37 +298,49 @@ class Parsedown # ~ - if (isset($CurrentBlock['incomplete']) and method_exists($this, 'block'.$CurrentBlock['type'].'Complete')) + if (isset($CurrentBlock['continuable']) and $this->isBlockCompletable($CurrentBlock['type'])) { - $CurrentBlock = $this->{'block'.$CurrentBlock['type'].'Complete'}($CurrentBlock); + $methodName = 'block' . $CurrentBlock['type'] . 'Complete'; + $CurrentBlock = $this->$methodName($CurrentBlock); } # ~ - $Blocks []= $CurrentBlock; - - unset($Blocks[0]); + if (isset($CurrentBlock)) + { + $Elements[] = $this->extractElement($CurrentBlock); + } # ~ - $markup = ''; + return $Elements; + } - foreach ($Blocks as $Block) + protected function extractElement(array $Component) + { + if ( ! isset($Component['element'])) { - if (isset($Block['hidden'])) + if (isset($Component['markup'])) { - continue; + $Component['element'] = array('rawHtml' => $Component['markup']); + } + elseif (isset($Component['hidden'])) + { + $Component['element'] = array(); } - - $markup .= "\n"; - $markup .= isset($Block['markup']) ? $Block['markup'] : $this->element($Block['element']); } - $markup .= "\n"; + return $Component['element']; + } - # ~ + protected function isBlockContinuable($Type) + { + return method_exists($this, 'block' . $Type . 'Continue'); + } - return $markup; + protected function isBlockCompletable($Type) + { + return method_exists($this, 'block' . $Type . 'Complete'); } # @@ -291,7 +348,7 @@ class Parsedown protected function blockCode($Line, $Block = null) { - if (isset($Block) and ! isset($Block['type']) and ! isset($Block['interrupted'])) + if (isset($Block) and $Block['type'] === 'Paragraph' and ! isset($Block['interrupted'])) { return; } @@ -303,8 +360,7 @@ class Parsedown $Block = array( 'element' => array( 'name' => 'pre', - 'handler' => 'element', - 'text' => array( + 'element' => array( 'name' => 'code', 'text' => $text, ), @@ -321,16 +377,16 @@ class Parsedown { if (isset($Block['interrupted'])) { - $Block['element']['text']['text'] .= "\n"; + $Block['element']['element']['text'] .= str_repeat("\n", $Block['interrupted']); unset($Block['interrupted']); } - $Block['element']['text']['text'] .= "\n"; + $Block['element']['element']['text'] .= "\n"; $text = substr($Line['body'], 4); - $Block['element']['text']['text'] .= $text; + $Block['element']['element']['text'] .= $text; return $Block; } @@ -338,12 +394,6 @@ class Parsedown protected function blockCodeComplete($Block) { - $text = $Block['element']['text']['text']; - - $text = htmlspecialchars($text, ENT_NOQUOTES, 'UTF-8'); - - $Block['element']['text']['text'] = $text; - return $Block; } @@ -352,18 +402,21 @@ class Parsedown protected function blockComment($Line) { - if ($this->markupEscaped) + if ($this->markupEscaped or $this->safeMode) { return; } - if (isset($Line['text'][3]) and $Line['text'][3] === '-' and $Line['text'][2] === '-' and $Line['text'][1] === '!') + if (strpos($Line['text'], '$/', $Line['text'])) + if (strpos($Line['text'], '-->') !== false) { $Block['closed'] = true; } @@ -379,9 +432,9 @@ class Parsedown return; } - $Block['markup'] .= "\n" . $Line['body']; + $Block['element']['rawHtml'] .= "\n" . $Line['body']; - if (preg_match('/-->$/', $Line['text'])) + if (strpos($Line['text'], '-->') !== false) { $Block['closed'] = true; } @@ -394,33 +447,56 @@ class Parsedown protected function blockFencedCode($Line) { - if (preg_match('/^(['.$Line['text'][0].']{3,})[ ]*([\w-]+)?[ ]*$/', $Line['text'], $matches)) + $marker = $Line['text'][0]; + + $openerLength = strspn($Line['text'], $marker); + + if ($openerLength < 3) { - $Element = array( - 'name' => 'code', - 'text' => '', - ); - - if (isset($matches[2])) - { - $class = 'language-'.$matches[2]; - - $Element['attributes'] = array( - 'class' => $class, - ); - } - - $Block = array( - 'char' => $Line['text'][0], - 'element' => array( - 'name' => 'pre', - 'handler' => 'element', - 'text' => $Element, - ), - ); - - return $Block; + return; } + + $infostring = trim(substr($Line['text'], $openerLength), "\t "); + + if (strpos($infostring, '`') !== false) + { + return; + } + + $Element = array( + 'name' => 'code', + 'text' => '', + ); + + if ($infostring !== '') + { + /** + * https://www.w3.org/TR/2011/WD-html5-20110525/elements.html#classes + * Every HTML element may have a class attribute specified. + * The attribute, if specified, must have a value that is a set + * of space-separated tokens representing the various classes + * that the element belongs to. + * [...] + * The space characters, for the purposes of this specification, + * are U+0020 SPACE, U+0009 CHARACTER TABULATION (tab), + * U+000A LINE FEED (LF), U+000C FORM FEED (FF), and + * U+000D CARRIAGE RETURN (CR). + */ + $language = substr($infostring, 0, strcspn($infostring, " \t\n\f\r")); + + $Element['attributes'] = array('class' => "language-$language"); + } + + $Block = array( + 'char' => $marker, + 'openerLength' => $openerLength, + 'element' => array( + 'name' => 'pre', + 'element' => $Element, + ), + ); + + return $Block; } protected function blockFencedCodeContinue($Line, $Block) @@ -432,33 +508,28 @@ class Parsedown if (isset($Block['interrupted'])) { - $Block['element']['text']['text'] .= "\n"; + $Block['element']['element']['text'] .= str_repeat("\n", $Block['interrupted']); unset($Block['interrupted']); } - if (preg_match('/^'.$Block['char'].'{3,}[ ]*$/', $Line['text'])) - { - $Block['element']['text']['text'] = substr($Block['element']['text']['text'], 1); + if (($len = strspn($Line['text'], $Block['char'])) >= $Block['openerLength'] + and chop(substr($Line['text'], $len), ' ') === '' + ) { + $Block['element']['element']['text'] = substr($Block['element']['element']['text'], 1); $Block['complete'] = true; return $Block; } - $Block['element']['text']['text'] .= "\n".$Line['body'];; + $Block['element']['element']['text'] .= "\n" . $Line['body']; return $Block; } protected function blockFencedCodeComplete($Block) { - $text = $Block['element']['text']['text']; - - $text = htmlspecialchars($text, ENT_NOQUOTES, 'UTF-8'); - - $Block['element']['text']['text'] = $text; - return $Block; } @@ -467,61 +538,103 @@ class Parsedown protected function blockHeader($Line) { - if (isset($Line['text'][1])) + $level = strspn($Line['text'], '#'); + + if ($level > 6) { - $level = 1; - - while (isset($Line['text'][$level]) and $Line['text'][$level] === '#') - { - $level ++; - } - - if ($level > 6) - { - return; - } - - $text = trim($Line['text'], '# '); - - $Block = array( - 'element' => array( - 'name' => 'h' . min(6, $level), - 'text' => $text, - 'handler' => 'line', - ), - ); - - return $Block; + return; } + + $text = trim($Line['text'], '#'); + + if ($this->strictMode and isset($text[0]) and $text[0] !== ' ') + { + return; + } + + $text = trim($text, ' '); + + $Block = array( + 'element' => array( + 'name' => 'h' . $level, + 'handler' => array( + 'function' => 'lineElements', + 'argument' => $text, + 'destination' => 'elements', + ) + ), + ); + + return $Block; } # # List - protected function blockList($Line) + protected function blockList($Line, array $CurrentBlock = null) { - list($name, $pattern) = $Line['text'][0] <= '-' ? array('ul', '[*+-]') : array('ol', '[0-9]+[.]'); + list($name, $pattern) = $Line['text'][0] <= '-' ? array('ul', '[*+-]') : array('ol', '[0-9]{1,9}+[.\)]'); - if (preg_match('/^('.$pattern.'[ ]+)(.*)/', $Line['text'], $matches)) + if (preg_match('/^('.$pattern.'([ ]++|$))(.*+)/', $Line['text'], $matches)) { + $contentIndent = strlen($matches[2]); + + if ($contentIndent >= 5) + { + $contentIndent -= 1; + $matches[1] = substr($matches[1], 0, -$contentIndent); + $matches[3] = str_repeat(' ', $contentIndent) . $matches[3]; + } + elseif ($contentIndent === 0) + { + $matches[1] .= ' '; + } + + $markerWithoutWhitespace = strstr($matches[1], ' ', true); + $Block = array( 'indent' => $Line['indent'], 'pattern' => $pattern, + 'data' => array( + 'type' => $name, + 'marker' => $matches[1], + 'markerType' => ($name === 'ul' ? $markerWithoutWhitespace : substr($markerWithoutWhitespace, -1)), + ), 'element' => array( 'name' => $name, - 'handler' => 'elements', + 'elements' => array(), ), ); + $Block['data']['markerTypeRegex'] = preg_quote($Block['data']['markerType'], '/'); + + if ($name === 'ol') + { + $listStart = ltrim(strstr($matches[1], $Block['data']['markerType'], true), '0') ?: '0'; + + if ($listStart !== '1') + { + if ( + isset($CurrentBlock) + and $CurrentBlock['type'] === 'Paragraph' + and ! isset($CurrentBlock['interrupted']) + ) { + return; + } + + $Block['element']['attributes'] = array('start' => $listStart); + } + } $Block['li'] = array( 'name' => 'li', - 'handler' => 'li', - 'text' => array( - $matches[2], - ), + 'handler' => array( + 'function' => 'li', + 'argument' => !empty($matches[3]) ? array($matches[3]) : array(), + 'destination' => 'elements' + ) ); - $Block['element']['text'] []= & $Block['li']; + $Block['element']['elements'] []= & $Block['li']; return $Block; } @@ -529,11 +642,29 @@ class Parsedown protected function blockListContinue($Line, array $Block) { - if ($Block['indent'] === $Line['indent'] and preg_match('/^'.$Block['pattern'].'(?:[ ]+(.*)|$)/', $Line['text'], $matches)) + if (isset($Block['interrupted']) and empty($Block['li']['handler']['argument'])) { + return null; + } + + $requiredIndent = ($Block['indent'] + strlen($Block['data']['marker'])); + + if ($Line['indent'] < $requiredIndent + and ( + ( + $Block['data']['type'] === 'ol' + and preg_match('/^[0-9]++'.$Block['data']['markerTypeRegex'].'(?:[ ]++(.*)|$)/', $Line['text'], $matches) + ) or ( + $Block['data']['type'] === 'ul' + and preg_match('/^'.$Block['data']['markerTypeRegex'].'(?:[ ]++(.*)|$)/', $Line['text'], $matches) + ) + ) + ) { if (isset($Block['interrupted'])) { - $Block['li']['text'] []= ''; + $Block['li']['handler']['argument'] []= ''; + + $Block['loose'] = true; unset($Block['interrupted']); } @@ -542,45 +673,73 @@ class Parsedown $text = isset($matches[1]) ? $matches[1] : ''; + $Block['indent'] = $Line['indent']; + $Block['li'] = array( 'name' => 'li', - 'handler' => 'li', - 'text' => array( - $text, - ), + 'handler' => array( + 'function' => 'li', + 'argument' => array($text), + 'destination' => 'elements' + ) ); - $Block['element']['text'] []= & $Block['li']; + $Block['element']['elements'] []= & $Block['li']; return $Block; } + elseif ($Line['indent'] < $requiredIndent and $this->blockList($Line)) + { + return null; + } if ($Line['text'][0] === '[' and $this->blockReference($Line)) { return $Block; } + if ($Line['indent'] >= $requiredIndent) + { + if (isset($Block['interrupted'])) + { + $Block['li']['handler']['argument'] []= ''; + + $Block['loose'] = true; + + unset($Block['interrupted']); + } + + $text = substr($Line['body'], $requiredIndent); + + $Block['li']['handler']['argument'] []= $text; + + return $Block; + } + if ( ! isset($Block['interrupted'])) { - $text = preg_replace('/^[ ]{0,4}/', '', $Line['body']); + $text = preg_replace('/^[ ]{0,'.$requiredIndent.'}+/', '', $Line['body']); - $Block['li']['text'] []= $text; + $Block['li']['handler']['argument'] []= $text; return $Block; } + } - if ($Line['indent'] > 0) + protected function blockListComplete(array $Block) + { + if (isset($Block['loose'])) { - $Block['li']['text'] []= ''; - - $text = preg_replace('/^[ ]{0,4}/', '', $Line['body']); - - $Block['li']['text'] []= $text; - - unset($Block['interrupted']); - - return $Block; + foreach ($Block['element']['elements'] as &$li) + { + if (end($li['handler']['argument']) !== '') + { + $li['handler']['argument'] []= ''; + } + } } + + return $Block; } # @@ -588,13 +747,16 @@ class Parsedown protected function blockQuote($Line) { - if (preg_match('/^>[ ]?(.*)/', $Line['text'], $matches)) + if (preg_match('/^>[ ]?+(.*+)/', $Line['text'], $matches)) { $Block = array( 'element' => array( 'name' => 'blockquote', - 'handler' => 'lines', - 'text' => (array) $matches[1], + 'handler' => array( + 'function' => 'linesElements', + 'argument' => (array) $matches[1], + 'destination' => 'elements', + ) ), ); @@ -604,23 +766,21 @@ class Parsedown protected function blockQuoteContinue($Line, array $Block) { - if ($Line['text'][0] === '>' and preg_match('/^>[ ]?(.*)/', $Line['text'], $matches)) + if (isset($Block['interrupted'])) { - if (isset($Block['interrupted'])) - { - $Block['element']['text'] []= ''; + return; + } - unset($Block['interrupted']); - } - - $Block['element']['text'] []= $matches[1]; + if ($Line['text'][0] === '>' and preg_match('/^>[ ]?+(.*+)/', $Line['text'], $matches)) + { + $Block['element']['handler']['argument'] []= $matches[1]; return $Block; } if ( ! isset($Block['interrupted'])) { - $Block['element']['text'] []= $Line['text']; + $Block['element']['handler']['argument'] []= $Line['text']; return $Block; } @@ -631,11 +791,13 @@ class Parsedown protected function blockRule($Line) { - if (preg_match('/^(['.$Line['text'][0].'])([ ]*\1){2,}[ ]*$/', $Line['text'])) + $marker = $Line['text'][0]; + + if (substr_count($Line['text'], $marker) >= 3 and chop($Line['text'], " $marker") === '') { $Block = array( 'element' => array( - 'name' => 'hr' + 'name' => 'hr', ), ); @@ -648,12 +810,12 @@ class Parsedown protected function blockSetextHeader($Line, array $Block = null) { - if ( ! isset($Block) or isset($Block['type']) or isset($Block['interrupted'])) + if ( ! isset($Block) or $Block['type'] !== 'Paragraph' or isset($Block['interrupted'])) { return; } - if (chop($Line['text'], $Line['text'][0]) === '') + if ($Line['indent'] < 4 and chop(chop($Line['text'], ' '), $Line['text'][0]) === '') { $Block['element']['name'] = $Line['text'][0] === '=' ? 'h1' : 'h2'; @@ -666,86 +828,40 @@ class Parsedown protected function blockMarkup($Line) { - if ($this->markupEscaped) + if ($this->markupEscaped or $this->safeMode) { return; } - if (preg_match('/^<(\w*)(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*(\/)?>/', $Line['text'], $matches)) + if (preg_match('/^<[\/]?+(\w*)(?:[ ]*+'.$this->regexHtmlAttribute.')*+[ ]*+(\/)?>/', $Line['text'], $matches)) { - if (in_array($matches[1], $this->textLevelElements)) + $element = strtolower($matches[1]); + + if (in_array($element, $this->textLevelElements)) { return; } $Block = array( 'name' => $matches[1], - 'depth' => 0, - 'markup' => $Line['text'], + 'element' => array( + 'rawHtml' => $Line['text'], + 'autobreak' => true, + ), ); - $length = strlen($matches[0]); - - $remainder = substr($Line['text'], $length); - - if (trim($remainder) === '') - { - if (isset($matches[2]) or in_array($matches[1], $this->voidElements)) - { - $Block['closed'] = true; - - $Block['void'] = true; - } - } - else - { - if (isset($matches[2]) or in_array($matches[1], $this->voidElements)) - { - return; - } - - if (preg_match('/<\/'.$matches[1].'>[ ]*$/i', $remainder)) - { - $Block['closed'] = true; - } - } - return $Block; } } protected function blockMarkupContinue($Line, array $Block) { - if (isset($Block['closed'])) + if (isset($Block['closed']) or isset($Block['interrupted'])) { return; } - if (preg_match('/^<'.$Block['name'].'(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*>/i', $Line['text'])) # open - { - $Block['depth'] ++; - } - - if (preg_match('/(.*?)<\/'.$Block['name'].'>[ ]*$/i', $Line['text'], $matches)) # close - { - if ($Block['depth'] > 0) - { - $Block['depth'] --; - } - else - { - $Block['closed'] = true; - } - } - - if (isset($Block['interrupted'])) - { - $Block['markup'] .= "\n"; - - unset($Block['interrupted']); - } - - $Block['markup'] .= "\n".$Line['body']; + $Block['element']['rawHtml'] .= "\n" . $Line['body']; return $Block; } @@ -755,24 +871,20 @@ class Parsedown protected function blockReference($Line) { - if (preg_match('/^\[(.+?)\]:[ ]*?(?:[ ]+["\'(](.+)["\')])?[ ]*$/', $Line['text'], $matches)) - { + if (strpos($Line['text'], ']') !== false + and preg_match('/^\[(.+?)\]:[ ]*+?(?:[ ]+["\'(](.+)["\')])?[ ]*+$/', $Line['text'], $matches) + ) { $id = strtolower($matches[1]); $Data = array( 'url' => $matches[2], - 'title' => null, + 'title' => isset($matches[3]) ? $matches[3] : null, ); - if (isset($matches[3])) - { - $Data['title'] = $matches[3]; - } - $this->DefinitionData['Reference'][$id] = $Data; $Block = array( - 'hidden' => true, + 'element' => array(), ); return $Block; @@ -784,109 +896,125 @@ class Parsedown protected function blockTable($Line, array $Block = null) { - if ( ! isset($Block) or isset($Block['type']) or isset($Block['interrupted'])) + if ( ! isset($Block) or $Block['type'] !== 'Paragraph' or isset($Block['interrupted'])) { return; } - if (strpos($Block['element']['text'], '|') !== false and chop($Line['text'], ' -:|') === '') - { - $alignments = array(); - - $divider = $Line['text']; - - $divider = trim($divider); - $divider = trim($divider, '|'); - - $dividerCells = explode('|', $divider); - - foreach ($dividerCells as $dividerCell) - { - $dividerCell = trim($dividerCell); - - if ($dividerCell === '') - { - continue; - } - - $alignment = null; - - if ($dividerCell[0] === ':') - { - $alignment = 'left'; - } - - if (substr($dividerCell, - 1) === ':') - { - $alignment = $alignment === 'left' ? 'center' : 'right'; - } - - $alignments []= $alignment; - } - - # ~ - - $HeaderElements = array(); - - $header = $Block['element']['text']; - - $header = trim($header); - $header = trim($header, '|'); - - $headerCells = explode('|', $header); - - foreach ($headerCells as $index => $headerCell) - { - $headerCell = trim($headerCell); - - $HeaderElement = array( - 'name' => 'th', - 'text' => $headerCell, - 'handler' => 'line', - ); - - if (isset($alignments[$index])) - { - $alignment = $alignments[$index]; - - $HeaderElement['attributes'] = array( - 'style' => 'text-align: '.$alignment.';', - ); - } - - $HeaderElements []= $HeaderElement; - } - - # ~ - - $Block = array( - 'alignments' => $alignments, - 'identified' => true, - 'element' => array( - 'name' => 'table', - 'handler' => 'elements', - ), - ); - - $Block['element']['text'] []= array( - 'name' => 'thead', - 'handler' => 'elements', - ); - - $Block['element']['text'] []= array( - 'name' => 'tbody', - 'handler' => 'elements', - 'text' => array(), - ); - - $Block['element']['text'][0]['text'] []= array( - 'name' => 'tr', - 'handler' => 'elements', - 'text' => $HeaderElements, - ); - - return $Block; + if ( + strpos($Block['element']['handler']['argument'], '|') === false + and strpos($Line['text'], '|') === false + and strpos($Line['text'], ':') === false + or strpos($Block['element']['handler']['argument'], "\n") !== false + ) { + return; } + + if (chop($Line['text'], ' -:|') !== '') + { + return; + } + + $alignments = array(); + + $divider = $Line['text']; + + $divider = trim($divider); + $divider = trim($divider, '|'); + + $dividerCells = explode('|', $divider); + + foreach ($dividerCells as $dividerCell) + { + $dividerCell = trim($dividerCell); + + if ($dividerCell === '') + { + return; + } + + $alignment = null; + + if ($dividerCell[0] === ':') + { + $alignment = 'left'; + } + + if (substr($dividerCell, - 1) === ':') + { + $alignment = $alignment === 'left' ? 'center' : 'right'; + } + + $alignments []= $alignment; + } + + # ~ + + $HeaderElements = array(); + + $header = $Block['element']['handler']['argument']; + + $header = trim($header); + $header = trim($header, '|'); + + $headerCells = explode('|', $header); + + if (count($headerCells) !== count($alignments)) + { + return; + } + + foreach ($headerCells as $index => $headerCell) + { + $headerCell = trim($headerCell); + + $HeaderElement = array( + 'name' => 'th', + 'handler' => array( + 'function' => 'lineElements', + 'argument' => $headerCell, + 'destination' => 'elements', + ) + ); + + if (isset($alignments[$index])) + { + $alignment = $alignments[$index]; + + $HeaderElement['attributes'] = array( + 'style' => "text-align: $alignment;", + ); + } + + $HeaderElements []= $HeaderElement; + } + + # ~ + + $Block = array( + 'alignments' => $alignments, + 'identified' => true, + 'element' => array( + 'name' => 'table', + 'elements' => array(), + ), + ); + + $Block['element']['elements'] []= array( + 'name' => 'thead', + ); + + $Block['element']['elements'] []= array( + 'name' => 'tbody', + 'elements' => array(), + ); + + $Block['element']['elements'][0]['elements'] []= array( + 'name' => 'tr', + 'elements' => $HeaderElements, + ); + + return $Block; } protected function blockTableContinue($Line, array $Block) @@ -896,7 +1024,7 @@ class Parsedown return; } - if ($Line['text'][0] === '|' or strpos($Line['text'], '|')) + if (count($Block['alignments']) === 1 or $Line['text'][0] === '|' or strpos($Line['text'], '|')) { $Elements = array(); @@ -905,22 +1033,27 @@ class Parsedown $row = trim($row); $row = trim($row, '|'); - preg_match_all('/(?:(\\\\[|])|[^|`]|`[^`]+`|`)+/', $row, $matches); + preg_match_all('/(?:(\\\\[|])|[^|`]|`[^`]++`|`)++/', $row, $matches); - foreach ($matches[0] as $index => $cell) + $cells = array_slice($matches[0], 0, count($Block['alignments'])); + + foreach ($cells as $index => $cell) { $cell = trim($cell); $Element = array( 'name' => 'td', - 'handler' => 'line', - 'text' => $cell, + 'handler' => array( + 'function' => 'lineElements', + 'argument' => $cell, + 'destination' => 'elements', + ) ); if (isset($Block['alignments'][$index])) { $Element['attributes'] = array( - 'style' => 'text-align: '.$Block['alignments'][$index].';', + 'style' => 'text-align: ' . $Block['alignments'][$index] . ';', ); } @@ -929,11 +1062,10 @@ class Parsedown $Element = array( 'name' => 'tr', - 'handler' => 'elements', - 'text' => $Elements, + 'elements' => $Elements, ); - $Block['element']['text'][1]['text'] []= $Element; + $Block['element']['elements'][1]['elements'] []= $Element; return $Block; } @@ -945,13 +1077,27 @@ class Parsedown protected function paragraph($Line) { - $Block = array( + return array( + 'type' => 'Paragraph', 'element' => array( 'name' => 'p', - 'text' => $Line['text'], - 'handler' => 'line', + 'handler' => array( + 'function' => 'lineElements', + 'argument' => $Line['text'], + 'destination' => 'elements', + ), ), ); + } + + protected function paragraphContinue($Line, array $Block) + { + if (isset($Block['interrupted'])) + { + return; + } + + $Block['element']['handler']['argument'] .= "\n".$Line['text']; return $Block; } @@ -961,13 +1107,11 @@ class Parsedown # protected $InlineTypes = array( - '"' => array('SpecialCharacter'), '!' => array('Image'), '&' => array('SpecialCharacter'), '*' => array('Emphasis'), ':' => array('Url'), - '<' => array('UrlTag', 'EmailTag', 'Markup', 'SpecialCharacter'), - '>' => array('SpecialCharacter'), + '<' => array('UrlTag', 'EmailTag', 'Markup'), '[' => array('Link'), '_' => array('Emphasis'), '`' => array('Code'), @@ -977,85 +1121,148 @@ class Parsedown # ~ - protected $inlineMarkerList = '!"*_&[:<>`~\\'; + protected $inlineMarkerList = '!*_&[:<`~\\'; # # ~ # - public function line($text) + public function line($text, $nonNestables = array()) { - $markup = ''; + return $this->elements($this->lineElements($text, $nonNestables)); + } - $unexaminedText = $text; + protected function lineElements($text, $nonNestables = array()) + { + # standardize line breaks + $text = str_replace(array("\r\n", "\r"), "\n", $text); - $markerPosition = 0; + $Elements = array(); - while ($excerpt = strpbrk($unexaminedText, $this->inlineMarkerList)) + $nonNestables = (empty($nonNestables) + ? array() + : array_combine($nonNestables, $nonNestables) + ); + + # $excerpt is based on the first occurrence of a marker + + while ($excerpt = strpbrk($text, $this->inlineMarkerList)) { $marker = $excerpt[0]; - $markerPosition += strpos($unexaminedText, $marker); + $markerPosition = strlen($text) - strlen($excerpt); $Excerpt = array('text' => $excerpt, 'context' => $text); foreach ($this->InlineTypes[$marker] as $inlineType) { - $Inline = $this->{'inline'.$inlineType}($Excerpt); + # check to see if the current inline type is nestable in the current context + + if (isset($nonNestables[$inlineType])) + { + continue; + } + + $Inline = $this->{"inline$inlineType"}($Excerpt); if ( ! isset($Inline)) { continue; } - if (isset($Inline['position']) and $Inline['position'] > $markerPosition) # position is ahead of marker + # makes sure that the inline belongs to "our" marker + + if (isset($Inline['position']) and $Inline['position'] > $markerPosition) { continue; } + # sets a default inline position + if ( ! isset($Inline['position'])) { $Inline['position'] = $markerPosition; } + # cause the new element to 'inherit' our non nestables + + + $Inline['element']['nonNestables'] = isset($Inline['element']['nonNestables']) + ? array_merge($Inline['element']['nonNestables'], $nonNestables) + : $nonNestables + ; + + # the text that comes before the inline $unmarkedText = substr($text, 0, $Inline['position']); - $markup .= $this->unmarkedText($unmarkedText); + # compile the unmarked text + $InlineText = $this->inlineText($unmarkedText); + $Elements[] = $InlineText['element']; - $markup .= isset($Inline['markup']) ? $Inline['markup'] : $this->element($Inline['element']); + # compile the inline + $Elements[] = $this->extractElement($Inline); + # remove the examined text $text = substr($text, $Inline['position'] + $Inline['extent']); - $unexaminedText = $text; - - $markerPosition = 0; - continue 2; } - $unexaminedText = substr($excerpt, 1); + # the marker does not belong to an inline - $markerPosition ++; + $unmarkedText = substr($text, 0, $markerPosition + 1); + + $InlineText = $this->inlineText($unmarkedText); + $Elements[] = $InlineText['element']; + + $text = substr($text, $markerPosition + 1); } - $markup .= $this->unmarkedText($text); + $InlineText = $this->inlineText($text); + $Elements[] = $InlineText['element']; - return $markup; + foreach ($Elements as &$Element) + { + if ( ! isset($Element['autobreak'])) + { + $Element['autobreak'] = false; + } + } + + return $Elements; } # # ~ # + protected function inlineText($text) + { + $Inline = array( + 'extent' => strlen($text), + 'element' => array(), + ); + + $Inline['element']['elements'] = self::pregReplaceElements( + $this->breaksEnabled ? '/[ ]*+\n/' : '/(?:[ ]*+\\\\|[ ]{2,}+)\n/', + array( + array('name' => 'br'), + array('text' => "\n"), + ), + $text + ); + + return $Inline; + } + protected function inlineCode($Excerpt) { $marker = $Excerpt['text'][0]; - if (preg_match('/^('.$marker.'+)[ ]*(.+?)[ ]*(? strlen($matches[0]), @@ -1069,13 +1276,19 @@ class Parsedown protected function inlineEmailTag($Excerpt) { - if (strpos($Excerpt['text'], '>') !== false and preg_match('/^<((mailto:)?\S+?@\S+?)>/i', $Excerpt['text'], $matches)) - { + $hostnameLabel = '[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?'; + + $commonMarkEmail = '[a-zA-Z0-9.!#$%&\'*+\/=?^_`{|}~-]++@' + . $hostnameLabel . '(?:\.' . $hostnameLabel . ')*'; + + if (strpos($Excerpt['text'], '>') !== false + and preg_match("/^<((mailto:)?$commonMarkEmail)>/i", $Excerpt['text'], $matches) + ){ $url = $matches[1]; if ( ! isset($matches[2])) { - $url = 'mailto:' . $url; + $url = "mailto:$url"; } return array( @@ -1117,8 +1330,11 @@ class Parsedown 'extent' => strlen($matches[0]), 'element' => array( 'name' => $emphasis, - 'handler' => 'line', - 'text' => $matches[1], + 'handler' => array( + 'function' => 'lineElements', + 'argument' => $matches[1], + 'destination' => 'elements', + ) ), ); } @@ -1128,7 +1344,7 @@ class Parsedown if (isset($Excerpt['text'][1]) and in_array($Excerpt['text'][1], $this->specialCharacters)) { return array( - 'markup' => $Excerpt['text'][1], + 'element' => array('rawHtml' => $Excerpt['text'][1]), 'extent' => 2, ); } @@ -1156,8 +1372,9 @@ class Parsedown 'name' => 'img', 'attributes' => array( 'src' => $Link['element']['attributes']['href'], - 'alt' => $Link['element']['text'], + 'alt' => $Link['element']['handler']['argument'], ), + 'autobreak' => true, ), ); @@ -1172,8 +1389,12 @@ class Parsedown { $Element = array( 'name' => 'a', - 'handler' => 'line', - 'text' => null, + 'handler' => array( + 'function' => 'lineElements', + 'argument' => null, + 'destination' => 'elements', + ), + 'nonNestables' => array('Url', 'Link'), 'attributes' => array( 'href' => null, 'title' => null, @@ -1184,9 +1405,9 @@ class Parsedown $remainder = $Excerpt['text']; - if (preg_match('/\[((?:[^][]|(?R))*)\]/', $remainder, $matches)) + if (preg_match('/\[((?:[^][]++|(?R))*+)\]/', $remainder, $matches)) { - $Element['text'] = $matches[1]; + $Element['handler']['argument'] = $matches[1]; $extent += strlen($matches[0]); @@ -1197,7 +1418,7 @@ class Parsedown return; } - if (preg_match('/^[(]((?:[^ ()]|[(][^ )]+[)])+)(?:[ ]+("[^"]+"|\'[^\']+\'))?[)]/', $remainder, $matches)) + if (preg_match('/^[(]\s*+((?:[^ ()]++|[(][^ )]+[)])++)(?:[ ]+("[^"]*+"|\'[^\']*+\'))?\s*+[)]/', $remainder, $matches)) { $Element['attributes']['href'] = $matches[1]; @@ -1212,14 +1433,14 @@ class Parsedown { if (preg_match('/^\s*\[(.*?)\]/', $remainder, $matches)) { - $definition = $matches[1] ? $matches[1] : $Element['text']; + $definition = strlen($matches[1]) ? $matches[1] : $Element['handler']['argument']; $definition = strtolower($definition); $extent += strlen($matches[0]); } else { - $definition = strtolower($Element['text']); + $definition = strtolower($Element['handler']['argument']); } if ( ! isset($this->DefinitionData['Reference'][$definition])) @@ -1233,8 +1454,6 @@ class Parsedown $Element['attributes']['title'] = $Definition['title']; } - $Element['attributes']['href'] = str_replace(array('&', '<'), array('&', '<'), $Element['attributes']['href']); - return array( 'extent' => $extent, 'element' => $Element, @@ -1243,31 +1462,31 @@ class Parsedown protected function inlineMarkup($Excerpt) { - if ($this->markupEscaped or strpos($Excerpt['text'], '>') === false) + if ($this->markupEscaped or $this->safeMode or strpos($Excerpt['text'], '>') === false) { return; } - if ($Excerpt['text'][1] === '/' and preg_match('/^<\/\w*[ ]*>/s', $Excerpt['text'], $matches)) + if ($Excerpt['text'][1] === '/' and preg_match('/^<\/\w[\w-]*+[ ]*+>/s', $Excerpt['text'], $matches)) { return array( - 'markup' => $matches[0], + 'element' => array('rawHtml' => $matches[0]), 'extent' => strlen($matches[0]), ); } - if ($Excerpt['text'][1] === '!' and preg_match('/^/s', $Excerpt['text'], $matches)) + if ($Excerpt['text'][1] === '!' and preg_match('/^/s', $Excerpt['text'], $matches)) { return array( - 'markup' => $matches[0], + 'element' => array('rawHtml' => $matches[0]), 'extent' => strlen($matches[0]), ); } - if ($Excerpt['text'][1] !== ' ' and preg_match('/^<\w*(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*\/?>/s', $Excerpt['text'], $matches)) + if ($Excerpt['text'][1] !== ' ' and preg_match('/^<\w[\w-]*+(?:[ ]*+'.$this->regexHtmlAttribute.')*+[ ]*+\/?>/s', $Excerpt['text'], $matches)) { return array( - 'markup' => $matches[0], + 'element' => array('rawHtml' => $matches[0]), 'extent' => strlen($matches[0]), ); } @@ -1275,23 +1494,16 @@ class Parsedown protected function inlineSpecialCharacter($Excerpt) { - if ($Excerpt['text'][0] === '&' and ! preg_match('/^&#?\w+;/', $Excerpt['text'])) - { + if (substr($Excerpt['text'], 1, 1) !== ' ' and strpos($Excerpt['text'], ';') !== false + and preg_match('/^&(#?+[0-9a-zA-Z]++);/', $Excerpt['text'], $matches) + ) { return array( - 'markup' => '&', - 'extent' => 1, + 'element' => array('rawHtml' => '&' . $matches[1] . ';'), + 'extent' => strlen($matches[0]), ); } - $SpecialCharacter = array('>' => 'gt', '<' => 'lt', '"' => 'quot'); - - if (isset($SpecialCharacter[$Excerpt['text'][0]])) - { - return array( - 'markup' => '&'.$SpecialCharacter[$Excerpt['text'][0]].';', - 'extent' => 1, - ); - } + return; } protected function inlineStrikethrough($Excerpt) @@ -1307,8 +1519,11 @@ class Parsedown 'extent' => strlen($matches[0]), 'element' => array( 'name' => 'del', - 'text' => $matches[1], - 'handler' => 'line', + 'handler' => array( + 'function' => 'lineElements', + 'argument' => $matches[1], + 'destination' => 'elements', + ) ), ); } @@ -1321,16 +1536,19 @@ class Parsedown return; } - if (preg_match('/\bhttps?:[\/]{2}[^\s<]+\b\/*/ui', $Excerpt['context'], $matches, PREG_OFFSET_CAPTURE)) - { + if (strpos($Excerpt['context'], 'http') !== false + and preg_match('/\bhttps?+:[\/]{2}[^\s<]+\b\/*+/ui', $Excerpt['context'], $matches, PREG_OFFSET_CAPTURE) + ) { + $url = $matches[0][0]; + $Inline = array( 'extent' => strlen($matches[0][0]), 'position' => $matches[0][1], 'element' => array( 'name' => 'a', - 'text' => $matches[0][0], + 'text' => $url, 'attributes' => array( - 'href' => $matches[0][0], + 'href' => $url, ), ), ); @@ -1341,9 +1559,9 @@ class Parsedown protected function inlineUrlTag($Excerpt) { - if (strpos($Excerpt['text'], '>') !== false and preg_match('/^<(\w+:\/{2}[^ >]+)>/i', $Excerpt['text'], $matches)) + if (strpos($Excerpt['text'], '>') !== false and preg_match('/^<(\w++:\/{2}[^ >]++)>/i', $Excerpt['text'], $matches)) { - $url = str_replace(array('&', '<'), array('&', '<'), $matches[1]); + $url = $matches[1]; return array( 'extent' => strlen($matches[0]), @@ -1362,56 +1580,189 @@ class Parsedown protected function unmarkedText($text) { - if ($this->breaksEnabled) - { - $text = preg_replace('/[ ]*\n/', "
\n", $text); - } - else - { - $text = preg_replace('/(?:[ ][ ]+|[ ]*\\\\)\n/', "
\n", $text); - $text = str_replace(" \n", "\n", $text); - } - - return $text; + $Inline = $this->inlineText($text); + return $this->element($Inline['element']); } # # Handlers # - protected function element(array $Element) + protected function handle(array $Element) { - $markup = '<'.$Element['name']; - - if (isset($Element['attributes'])) + if (isset($Element['handler'])) { - foreach ($Element['attributes'] as $name => $value) + if (!isset($Element['nonNestables'])) { - if ($value === null) - { - continue; - } - - $markup .= ' '.$name.'="'.$value.'"'; + $Element['nonNestables'] = array(); } - } - if (isset($Element['text'])) - { - $markup .= '>'; - - if (isset($Element['handler'])) + if (is_string($Element['handler'])) { - $markup .= $this->{$Element['handler']}($Element['text']); + $function = $Element['handler']; + $argument = $Element['text']; + unset($Element['text']); + $destination = 'rawHtml'; } else { - $markup .= $Element['text']; + $function = $Element['handler']['function']; + $argument = $Element['handler']['argument']; + $destination = $Element['handler']['destination']; } - $markup .= ''; + $Element[$destination] = $this->{$function}($argument, $Element['nonNestables']); + + if ($destination === 'handler') + { + $Element = $this->handle($Element); + } + + unset($Element['handler']); } - else + + return $Element; + } + + protected function handleElementRecursive(array $Element) + { + return $this->elementApplyRecursive(array($this, 'handle'), $Element); + } + + protected function handleElementsRecursive(array $Elements) + { + return $this->elementsApplyRecursive(array($this, 'handle'), $Elements); + } + + protected function elementApplyRecursive($closure, array $Element) + { + $Element = call_user_func($closure, $Element); + + if (isset($Element['elements'])) + { + $Element['elements'] = $this->elementsApplyRecursive($closure, $Element['elements']); + } + elseif (isset($Element['element'])) + { + $Element['element'] = $this->elementApplyRecursive($closure, $Element['element']); + } + + return $Element; + } + + protected function elementApplyRecursiveDepthFirst($closure, array $Element) + { + if (isset($Element['elements'])) + { + $Element['elements'] = $this->elementsApplyRecursiveDepthFirst($closure, $Element['elements']); + } + elseif (isset($Element['element'])) + { + $Element['element'] = $this->elementsApplyRecursiveDepthFirst($closure, $Element['element']); + } + + $Element = call_user_func($closure, $Element); + + return $Element; + } + + protected function elementsApplyRecursive($closure, array $Elements) + { + foreach ($Elements as &$Element) + { + $Element = $this->elementApplyRecursive($closure, $Element); + } + + return $Elements; + } + + protected function elementsApplyRecursiveDepthFirst($closure, array $Elements) + { + foreach ($Elements as &$Element) + { + $Element = $this->elementApplyRecursiveDepthFirst($closure, $Element); + } + + return $Elements; + } + + protected function element(array $Element) + { + if ($this->safeMode) + { + $Element = $this->sanitiseElement($Element); + } + + # identity map if element has no handler + $Element = $this->handle($Element); + + $hasName = isset($Element['name']); + + $markup = ''; + + if ($hasName) + { + $markup .= '<' . $Element['name']; + + if (isset($Element['attributes'])) + { + foreach ($Element['attributes'] as $name => $value) + { + if ($value === null) + { + continue; + } + + $markup .= " $name=\"".self::escape($value).'"'; + } + } + } + + $permitRawHtml = false; + + if (isset($Element['text'])) + { + $text = $Element['text']; + } + // very strongly consider an alternative if you're writing an + // extension + elseif (isset($Element['rawHtml'])) + { + $text = $Element['rawHtml']; + + $allowRawHtmlInSafeMode = isset($Element['allowRawHtmlInSafeMode']) && $Element['allowRawHtmlInSafeMode']; + $permitRawHtml = !$this->safeMode || $allowRawHtmlInSafeMode; + } + + $hasContent = isset($text) || isset($Element['element']) || isset($Element['elements']); + + if ($hasContent) + { + $markup .= $hasName ? '>' : ''; + + if (isset($Element['elements'])) + { + $markup .= $this->elements($Element['elements']); + } + elseif (isset($Element['element'])) + { + $markup .= $this->element($Element['element']); + } + else + { + if (!$permitRawHtml) + { + $markup .= self::escape($text, true); + } + else + { + $markup .= $text; + } + } + + $markup .= $hasName ? '' : ''; + } + elseif ($hasName) { $markup .= ' />'; } @@ -1423,12 +1774,26 @@ class Parsedown { $markup = ''; + $autoBreak = true; + foreach ($Elements as $Element) { - $markup .= "\n" . $this->element($Element); + if (empty($Element)) + { + continue; + } + + $autoBreakNext = (isset($Element['autobreak']) + ? $Element['autobreak'] : isset($Element['name']) + ); + // (autobreak === false) covers both sides of an element + $autoBreak = !$autoBreak ? $autoBreak : $autoBreakNext; + + $markup .= ($autoBreak ? "\n" : '') . $this->element($Element); + $autoBreak = $autoBreakNext; } - $markup .= "\n"; + $markup .= $autoBreak ? "\n" : ''; return $markup; } @@ -1437,21 +1802,49 @@ class Parsedown protected function li($lines) { - $markup = $this->lines($lines); + $Elements = $this->linesElements($lines); - $trimmedMarkup = trim($markup); - - if ( ! in_array('', $lines) and substr($trimmedMarkup, 0, 3) === '

') - { - $markup = $trimmedMarkup; - $markup = substr($markup, 3); - - $position = strpos($markup, "

"); - - $markup = substr_replace($markup, '', $position, 4); + if ( ! in_array('', $lines) + and isset($Elements[0]) and isset($Elements[0]['name']) + and $Elements[0]['name'] === 'p' + ) { + unset($Elements[0]['name']); } - return $markup; + return $Elements; + } + + # + # AST Convenience + # + + /** + * Replace occurrences $regexp with $Elements in $text. Return an array of + * elements representing the replacement. + */ + protected static function pregReplaceElements($regexp, $Elements, $text) + { + $newElements = array(); + + while (preg_match($regexp, $text, $matches, PREG_OFFSET_CAPTURE)) + { + $offset = $matches[0][1]; + $before = substr($text, 0, $offset); + $after = substr($text, $offset + strlen($matches[0][0])); + + $newElements[] = array('text' => $before); + + foreach ($Elements as $Element) + { + $newElements[] = $Element; + } + + $text = $after; + } + + $newElements[] = array('text' => $text); + + return $newElements; } # @@ -1465,10 +1858,83 @@ class Parsedown return $markup; } + protected function sanitiseElement(array $Element) + { + static $goodAttribute = '/^[a-zA-Z0-9][a-zA-Z0-9-_]*+$/'; + static $safeUrlNameToAtt = array( + 'a' => 'href', + 'img' => 'src', + ); + + if ( ! isset($Element['name'])) + { + unset($Element['attributes']); + return $Element; + } + + if (isset($safeUrlNameToAtt[$Element['name']])) + { + $Element = $this->filterUnsafeUrlInAttribute($Element, $safeUrlNameToAtt[$Element['name']]); + } + + if ( ! empty($Element['attributes'])) + { + foreach ($Element['attributes'] as $att => $val) + { + # filter out badly parsed attribute + if ( ! preg_match($goodAttribute, $att)) + { + unset($Element['attributes'][$att]); + } + # dump onevent attribute + elseif (self::striAtStart($att, 'on')) + { + unset($Element['attributes'][$att]); + } + } + } + + return $Element; + } + + protected function filterUnsafeUrlInAttribute(array $Element, $attribute) + { + foreach ($this->safeLinksWhitelist as $scheme) + { + if (self::striAtStart($Element['attributes'][$attribute], $scheme)) + { + return $Element; + } + } + + $Element['attributes'][$attribute] = str_replace(':', '%3A', $Element['attributes'][$attribute]); + + return $Element; + } + # # Static Methods # + protected static function escape($text, $allowQuotes = false) + { + return htmlspecialchars($text, $allowQuotes ? ENT_NOQUOTES : ENT_QUOTES, 'UTF-8'); + } + + protected static function striAtStart($string, $needle) + { + $len = strlen($needle); + + if ($len > strlen($string)) + { + return false; + } + else + { + return strtolower(substr($string, 0, $len)) === strtolower($needle); + } + } + static function instance($name = 'default') { if (isset(self::$instances[$name])) @@ -1476,7 +1942,7 @@ class Parsedown return self::$instances[$name]; } - $instance = new self(); + $instance = new static(); self::$instances[$name] = $instance; @@ -1495,12 +1961,12 @@ class Parsedown # Read-Only protected $specialCharacters = array( - '\\', '`', '*', '_', '{', '}', '[', ']', '(', ')', '>', '#', '+', '-', '.', '!', '|', + '\\', '`', '*', '_', '{', '}', '[', ']', '(', ')', '>', '#', '+', '-', '.', '!', '|', '~' ); protected $StrongRegex = array( - '*' => '/^[*]{2}((?:\\\\\*|[^*]|[*][^*]*[*])+?)[*]{2}(?![*])/s', - '_' => '/^__((?:\\\\_|[^_]|_[^_]*_)+?)__(?!_)/us', + '*' => '/^[*]{2}((?:\\\\\*|[^*]|[*][^*]*+[*])+?)[*]{2}(?![*])/s', + '_' => '/^__((?:\\\\_|[^_]|_[^_]*+_)+?)__(?!_)/us', ); protected $EmRegex = array( @@ -1508,7 +1974,7 @@ class Parsedown '_' => '/^_((?:\\\\_|[^_]|__[^_]*__)+?)_(?!_)\b/us', ); - protected $regexHtmlAttribute = '[a-zA-Z_:][\w:.-]*(?:\s*=\s*(?:[^"\'=<>`\s]+|"[^"]*"|\'[^\']*\'))?'; + protected $regexHtmlAttribute = '[a-zA-Z_:][\w:.-]*+(?:\s*+=\s*+(?:[^"\'=<>`\s]+|"[^"]*+"|\'[^\']*+\'))?+'; protected $voidElements = array( 'area', 'base', 'br', 'col', 'command', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source', @@ -1519,10 +1985,10 @@ class Parsedown 'b', 'em', 'big', 'cite', 'small', 'spacer', 'listing', 'i', 'rp', 'del', 'code', 'strike', 'marquee', 'q', 'rt', 'ins', 'font', 'strong', - 's', 'tt', 'sub', 'mark', - 'u', 'xm', 'sup', 'nobr', - 'var', 'ruby', - 'wbr', 'span', - 'time', + 's', 'tt', 'kbd', 'mark', + 'u', 'xm', 'sub', 'nobr', + 'sup', 'ruby', + 'var', 'span', + 'wbr', 'time', ); } diff --git a/wire/modules/Textformatter/TextformatterMarkdownExtra/parsedown/README.md b/wire/modules/Textformatter/TextformatterMarkdownExtra/parsedown/README.md index 8d485e7d..a4b8c63a 100755 --- a/wire/modules/Textformatter/TextformatterMarkdownExtra/parsedown/README.md +++ b/wire/modules/Textformatter/TextformatterMarkdownExtra/parsedown/README.md @@ -1,37 +1,88 @@ -## Parsedown + -Better Markdown Parser in PHP +

Parsedown

-[[ demo ]](http://parsedown.org/demo) +

Parsedown

-### Features +[![Build Status](https://travis-ci.org/erusev/parsedown.svg)](https://travis-ci.org/erusev/parsedown) +[![Total Downloads](https://poser.pugx.org/erusev/parsedown/d/total.svg)](https://packagist.org/packages/erusev/parsedown) +[![Version](https://poser.pugx.org/erusev/parsedown/v/stable.svg)](https://packagist.org/packages/erusev/parsedown) +[![License](https://poser.pugx.org/erusev/parsedown/license.svg)](https://packagist.org/packages/erusev/parsedown) -* [Fast](http://parsedown.org/speed) -* [Consistent](http://parsedown.org/consistency) -* [GitHub flavored](https://help.github.com/articles/github-flavored-markdown) -* [Tested](http://parsedown.org/tests/) in PHP 5.2, 5.3, 5.4, 5.5, 5.6 and [hhvm](http://www.hhvm.com/) -* [Extensible](https://github.com/erusev/parsedown/wiki/Writing-Extensions) +Better Markdown Parser in PHP - Demo. + +## Features + +* One File +* No Dependencies +* [Super Fast](http://parsedown.org/speed) +* Extensible +* [GitHub flavored](https://github.github.com/gfm) +* [Tested](http://parsedown.org/tests/) in 5.3 to 7.3 * [Markdown Extra extension](https://github.com/erusev/parsedown-extra) -### Installation +## Installation -Include `Parsedown.php` or install [the composer package](https://packagist.org/packages/erusev/parsedown). +Install the [composer package]: -### Example + composer require erusev/parsedown -``` php +Or download the [latest release] and include `Parsedown.php` + +[composer package]: https://packagist.org/packages/erusev/parsedown "The Parsedown package on packagist.org" +[latest release]: https://github.com/erusev/parsedown/releases/latest "The latest release of Parsedown" + +## Example + +```php $Parsedown = new Parsedown(); echo $Parsedown->text('Hello _Parsedown_!'); # prints:

Hello Parsedown!

``` -More examples in [the wiki](https://github.com/erusev/parsedown/wiki/Usage) and in [this video tutorial](http://youtu.be/wYZBY8DEikI). +You can also parse inline markdown only: -### Questions +```php +echo $Parsedown->line('Hello _Parsedown_!'); # prints: Hello Parsedown! +``` + +More examples in [the wiki](https://github.com/erusev/parsedown/wiki/) and in [this video tutorial](http://youtu.be/wYZBY8DEikI). + +## Security + +Parsedown is capable of escaping user-input within the HTML that it generates. Additionally Parsedown will apply sanitisation to additional scripting vectors (such as scripting link destinations) that are introduced by the markdown syntax itself. + +To tell Parsedown that it is processing untrusted user-input, use the following: + +```php +$Parsedown->setSafeMode(true); +``` + +If instead, you wish to allow HTML within untrusted user-input, but still want output to be free from XSS it is recommended that you make use of a HTML sanitiser that allows HTML tags to be whitelisted, like [HTML Purifier](http://htmlpurifier.org/). + +In both cases you should strongly consider employing defence-in-depth measures, like [deploying a Content-Security-Policy](https://scotthelme.co.uk/content-security-policy-an-introduction/) (a browser security feature) so that your page is likely to be safe even if an attacker finds a vulnerability in one of the first lines of defence above. + +#### Security of Parsedown Extensions + +Safe mode does not necessarily yield safe results when using extensions to Parsedown. Extensions should be evaluated on their own to determine their specific safety against XSS. + +## Escaping HTML + +> **WARNING:** This method isn't safe from XSS! + +If you wish to escape HTML **in trusted input**, you can use the following: + +```php +$Parsedown->setMarkupEscaped(true); +``` + +Beware that this still allows users to insert unsafe scripting vectors, such as links like `[xss](javascript:alert%281%29)`. + +## Questions **How does Parsedown work?** -It tries to read Markdown like a human. First, it looks at the lines. It’s interested in how the lines start. This helps it recognise blocks. It knows, for example, that if a line start with a `-` then it perhaps belong to a list. Once it recognises the blocks, it continues to the content. As it reads, it watches out for special characters. This helps it recognise inline elements (or inlines). +It tries to read Markdown like a human. First, it looks at the lines. It’s interested in how the lines start. This helps it recognise blocks. It knows, for example, that if a line starts with a `-` then perhaps it belongs to a list. Once it recognises the blocks, it continues to the content. As it reads, it watches out for special characters. This helps it recognise inline elements (or inlines). We call this approach "line based". We believe that Parsedown is the first Markdown parser to use it. Since the release of Parsedown, other developers have used the same approach to develop other Markdown parsers in PHP and in other languages. @@ -41,8 +92,12 @@ It passes most of the CommonMark tests. Most of the tests that don't pass deal w **Who uses it?** -[phpDocumentor](http://www.phpdoc.org/), [October CMS](http://octobercms.com/), [Bolt CMS](http://bolt.cm/), [Kirby CMS](http://getkirby.com/), [Grav CMS](http://getgrav.org/), [Statamic CMS](http://www.statamic.com/), [RaspberryPi.org](http://www.raspberrypi.org/) and [more](https://www.versioneye.com/php/erusev:parsedown/references). +[Laravel Framework](https://laravel.com/), [Bolt CMS](http://bolt.cm/), [Grav CMS](http://getgrav.org/), [Herbie CMS](http://www.getherbie.org/), [Kirby CMS](http://getkirby.com/), [October CMS](http://octobercms.com/), [Pico CMS](http://picocms.org), [Statamic CMS](http://www.statamic.com/), [phpDocumentor](http://www.phpdoc.org/), [RaspberryPi.org](http://www.raspberrypi.org/), [Symfony Demo](https://github.com/symfony/demo) and [more](https://packagist.org/packages/erusev/parsedown/dependents). **How can I help?** -Use it, star it, share it and if you feel generous, [donate some money](https://www.paypal.com/cgi-bin/webscr?cmd=_s-xclick&hosted_button_id=528P3NZQMP8N2). +Use it, star it, share it and if you feel generous, [donate](https://www.paypal.com/cgi-bin/webscr?cmd=_s-xclick&hosted_button_id=528P3NZQMP8N2). + +**What else should I know?** + +I also make [Nota](https://nota.md/) — a writing app designed for Markdown files :)