From 5b3300c53006e5b093948ebdd7a8155b7ab29308 Mon Sep 17 00:00:00 2001
From: Ryan Cramer
" .
+ "" . $this->wire()->sanitizer->entities($markup) . '
',
+ Notice::allowMarkup | Notice::noGroup
+ );
+ }
+ }
+
}
}
diff --git a/wire/modules/Textformatter/TextformatterMarkdownExtra/parsedown-extra/ParsedownExtra.php b/wire/modules/Textformatter/TextformatterMarkdownExtra/parsedown-extra/ParsedownExtra.php
index b9574549..8cdb5d21 100755
--- a/wire/modules/Textformatter/TextformatterMarkdownExtra/parsedown-extra/ParsedownExtra.php
+++ b/wire/modules/Textformatter/TextformatterMarkdownExtra/parsedown-extra/ParsedownExtra.php
@@ -1,4 +1,4 @@
-BlockTypes[':'] []= 'DefinitionList';
@@ -43,7 +43,13 @@ class ParsedownExtra extends Parsedown
function text($text)
{
- $markup = parent::text($text);
+ $Elements = $this->textElements($text);
+
+ # convert to markup
+ $markup = $this->elements($Elements);
+
+ # trim line breaks
+ $markup = trim($markup, "\n");
# merge consecutive dl elements
@@ -139,25 +145,27 @@ class ParsedownExtra extends Parsedown
protected function blockDefinitionList($Line, $Block)
{
- if ( ! isset($Block) or isset($Block['type']))
+ if ( ! isset($Block) or $Block['type'] !== 'Paragraph')
{
return;
}
$Element = array(
'name' => 'dl',
- 'handler' => 'elements',
- 'text' => array(),
+ 'elements' => array(),
);
- $terms = explode("\n", $Block['element']['text']);
+ $terms = explode("\n", $Block['element']['handler']['argument']);
foreach ($terms as $term)
{
- $Element['text'] []= array(
+ $Element['elements'] []= array(
'name' => 'dt',
- 'handler' => 'line',
- 'text' => $term,
+ 'handler' => array(
+ 'function' => 'lineElements',
+ 'argument' => $term,
+ 'destination' => 'elements'
+ ),
);
}
@@ -185,15 +193,17 @@ class ParsedownExtra extends Parsedown
if (isset($Block['interrupted']))
{
- $Block['dd']['handler'] = 'text';
- $Block['dd']['text'] .= "\n\n";
+ $Block['dd']['handler']['function'] = 'textElements';
+ $Block['dd']['handler']['argument'] .= "\n\n";
+
+ $Block['dd']['handler']['destination'] = 'elements';
unset($Block['interrupted']);
}
$text = substr($Line['body'], min($Line['indent'], 4));
- $Block['dd']['text'] .= "\n" . $text;
+ $Block['dd']['handler']['argument'] .= "\n" . $text;
return $Block;
}
@@ -206,13 +216,13 @@ class ParsedownExtra extends Parsedown
{
$Block = parent::blockHeader($Line);
- if (preg_match('/[ #]*{('.$this->regexAttribute.'+)}[ ]*$/', $Block['element']['text'], $matches, PREG_OFFSET_CAPTURE))
+ if ($Block !== null && preg_match('/[ #]*{('.$this->regexAttribute.'+)}[ ]*$/', $Block['element']['handler']['argument'], $matches, PREG_OFFSET_CAPTURE))
{
$attributeString = $matches[1][0];
$Block['element']['attributes'] = $this->parseAttributeData($attributeString);
- $Block['element']['text'] = substr($Block['element']['text'], 0, $matches[0][1]);
+ $Block['element']['handler']['argument'] = substr($Block['element']['handler']['argument'], 0, $matches[0][1]);
}
return $Block;
@@ -221,11 +231,98 @@ class ParsedownExtra extends Parsedown
#
# Markup
+ protected function blockMarkup($Line)
+ {
+ if ($this->markupEscaped or $this->safeMode)
+ {
+ return;
+ }
+
+ if (preg_match('/^<(\w[\w-]*)(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*(\/)?>/', $Line['text'], $matches))
+ {
+ $element = strtolower($matches[1]);
+
+ if (in_array($element, $this->textLevelElements))
+ {
+ return;
+ }
+
+ $Block = array(
+ 'name' => $matches[1],
+ 'depth' => 0,
+ 'element' => array(
+ 'rawHtml' => $Line['text'],
+ 'autobreak' => true,
+ ),
+ );
+
+ $length = strlen($matches[0]);
+ $remainder = substr($Line['text'], $length);
+
+ if (trim($remainder) === '')
+ {
+ if (isset($matches[2]) or in_array($matches[1], $this->voidElements))
+ {
+ $Block['closed'] = true;
+ $Block['void'] = true;
+ }
+ }
+ else
+ {
+ if (isset($matches[2]) or in_array($matches[1], $this->voidElements))
+ {
+ return;
+ }
+ if (preg_match('/<\/'.$matches[1].'>[ ]*$/i', $remainder))
+ {
+ $Block['closed'] = true;
+ }
+ }
+
+ return $Block;
+ }
+ }
+
+ protected function blockMarkupContinue($Line, array $Block)
+ {
+ if (isset($Block['closed']))
+ {
+ return;
+ }
+
+ if (preg_match('/^<'.$Block['name'].'(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*>/i', $Line['text'])) # open
+ {
+ $Block['depth'] ++;
+ }
+
+ if (preg_match('/(.*?)<\/'.$Block['name'].'>[ ]*$/i', $Line['text'], $matches)) # close
+ {
+ if ($Block['depth'] > 0)
+ {
+ $Block['depth'] --;
+ }
+ else
+ {
+ $Block['closed'] = true;
+ }
+ }
+
+ if (isset($Block['interrupted']))
+ {
+ $Block['element']['rawHtml'] .= "\n";
+ unset($Block['interrupted']);
+ }
+
+ $Block['element']['rawHtml'] .= "\n".$Line['body'];
+
+ return $Block;
+ }
+
protected function blockMarkupComplete($Block)
{
if ( ! isset($Block['void']))
{
- $Block['markup'] = $this->processTag($Block['markup']);
+ $Block['element']['rawHtml'] = $this->processTag($Block['element']['rawHtml']);
}
return $Block;
@@ -238,13 +335,13 @@ class ParsedownExtra extends Parsedown
{
$Block = parent::blockSetextHeader($Line, $Block);
- if (preg_match('/[ ]*{('.$this->regexAttribute.'+)}[ ]*$/', $Block['element']['text'], $matches, PREG_OFFSET_CAPTURE))
+ if ($Block !== null && preg_match('/[ ]*{('.$this->regexAttribute.'+)}[ ]*$/', $Block['element']['handler']['argument'], $matches, PREG_OFFSET_CAPTURE))
{
$attributeString = $matches[1][0];
$Block['element']['attributes'] = $this->parseAttributeData($attributeString);
- $Block['element']['text'] = substr($Block['element']['text'], 0, $matches[0][1]);
+ $Block['element']['handler']['argument'] = substr($Block['element']['handler']['argument'], 0, $matches[0][1]);
}
return $Block;
@@ -278,8 +375,7 @@ class ParsedownExtra extends Parsedown
$Element = array(
'name' => 'sup',
'attributes' => array('id' => 'fnref'.$this->DefinitionData['Footnote'][$name]['count'].':'.$name),
- 'handler' => 'element',
- 'text' => array(
+ 'element' => array(
'name' => 'a',
'attributes' => array('href' => '#fn:'.$name, 'class' => 'footnote-ref'),
'text' => $this->DefinitionData['Footnote'][$name]['number'],
@@ -302,7 +398,7 @@ class ParsedownExtra extends Parsedown
{
$Link = parent::inlineLink($Excerpt);
- $remainder = substr($Excerpt['text'], $Link['extent']);
+ $remainder = $Link !== null ? substr($Excerpt['text'], $Link['extent']) : '';
if (preg_match('/^[ ]*{('.$this->regexAttribute.'+)}/', $remainder, $matches))
{
@@ -318,21 +414,52 @@ class ParsedownExtra extends Parsedown
# ~
#
- protected function unmarkedText($text)
+ private $currentAbreviation;
+ private $currentMeaning;
+
+ protected function insertAbreviation(array $Element)
{
- $text = parent::unmarkedText($text);
+ if (isset($Element['text']))
+ {
+ $Element['elements'] = self::pregReplaceElements(
+ '/\b'.preg_quote($this->currentAbreviation, '/').'\b/',
+ array(
+ array(
+ 'name' => 'abbr',
+ 'attributes' => array(
+ 'title' => $this->currentMeaning,
+ ),
+ 'text' => $this->currentAbreviation,
+ )
+ ),
+ $Element['text']
+ );
+
+ unset($Element['text']);
+ }
+
+ return $Element;
+ }
+
+ protected function inlineText($text)
+ {
+ $Inline = parent::inlineText($text);
if (isset($this->DefinitionData['Abbreviation']))
{
foreach ($this->DefinitionData['Abbreviation'] as $abbreviation => $meaning)
{
- $pattern = '/\b'.preg_quote($abbreviation).'\b/';
+ $this->currentAbreviation = $abbreviation;
+ $this->currentMeaning = $meaning;
- $text = preg_replace($pattern, ''.$abbreviation.'', $text);
+ $Inline['element'] = $this->elementApplyRecursiveDepthFirst(
+ array($this, 'insertAbreviation'),
+ $Inline['element']
+ );
}
}
- return $text;
+ return $Inline;
}
#
@@ -348,18 +475,21 @@ class ParsedownExtra extends Parsedown
$Block['dd'] = array(
'name' => 'dd',
- 'handler' => 'line',
- 'text' => $text,
+ 'handler' => array(
+ 'function' => 'lineElements',
+ 'argument' => $text,
+ 'destination' => 'elements'
+ ),
);
if (isset($Block['interrupted']))
{
- $Block['dd']['handler'] = 'text';
+ $Block['dd']['handler']['function'] = 'textElements';
unset($Block['interrupted']);
}
- $Block['element']['text'] []= & $Block['dd'];
+ $Block['element']['elements'] []= & $Block['dd'];
return $Block;
}
@@ -369,15 +499,11 @@ class ParsedownExtra extends Parsedown
$Element = array(
'name' => 'div',
'attributes' => array('class' => 'footnotes'),
- 'handler' => 'elements',
- 'text' => array(
- array(
- 'name' => 'hr',
- ),
+ 'elements' => array(
+ array('name' => 'hr'),
array(
'name' => 'ol',
- 'handler' => 'elements',
- 'text' => array(),
+ 'elements' => array(),
),
),
);
@@ -393,34 +519,68 @@ class ParsedownExtra extends Parsedown
$text = $DefinitionData['text'];
- $text = parent::text($text);
+ $textElements = parent::textElements($text);
$numbers = range(1, $DefinitionData['count']);
- $backLinksMarkup = '';
+ $backLinkElements = array();
foreach ($numbers as $number)
{
- $backLinksMarkup .= ' ↩';
+ $backLinkElements[] = array('text' => ' ');
+ $backLinkElements[] = array(
+ 'name' => 'a',
+ 'attributes' => array(
+ 'href' => "#fnref$number:$definitionId",
+ 'rev' => 'footnote',
+ 'class' => 'footnote-backref',
+ ),
+ 'rawHtml' => '↩',
+ 'allowRawHtmlInSafeMode' => true,
+ 'autobreak' => false,
+ );
}
- $backLinksMarkup = substr($backLinksMarkup, 1);
+ unset($backLinkElements[0]);
- if (substr($text, - 4) === '
'.$backLinksMarkup.'
'; + $textElements[] = array( + 'name' => 'p', + 'elements' => $backLinkElements + ); } - $Element['text'][1]['text'] []= array( + $Element['elements'][1]['elements'] []= array( 'name' => 'li', 'attributes' => array('id' => 'fn:'.$definitionId), - 'text' => "\n".$text."\n", + 'elements' => array_merge( + $textElements + ), ); } @@ -503,10 +663,10 @@ class ParsedownExtra extends Parsedown } # because we don't want for markup to get encoded - $DOMDocument->documentElement->nodeValue = 'placeholder'; + $DOMDocument->documentElement->nodeValue = 'placeholder\x1A'; $markup = $DOMDocument->saveHTML($DOMDocument->documentElement); - $markup = str_replace('placeholder', $elementText, $markup); + $markup = str_replace('placeholder\x1A', $elementText, $markup); return $markup; } diff --git a/wire/modules/Textformatter/TextformatterMarkdownExtra/parsedown-extra/README.md b/wire/modules/Textformatter/TextformatterMarkdownExtra/parsedown-extra/README.md index ee6ef87f..cee4b543 100755 --- a/wire/modules/Textformatter/TextformatterMarkdownExtra/parsedown-extra/README.md +++ b/wire/modules/Textformatter/TextformatterMarkdownExtra/parsedown-extra/README.md @@ -1,8 +1,12 @@ +> You might also like [Caret](http://caret.io?ref=parsedown) - our Markdown editor for the Desktop. + ## Parsedown Extra +[](https://travis-ci.org/erusev/parsedown-extra) + An extension of [Parsedown](http://parsedown.org) that adds support for [Markdown Extra](https://michelf.ca/projects/php-markdown/extra/). -[[ demo ]](http://parsedown.org/extra/) +[See Demo](http://parsedown.org/extra/) ### Installation diff --git a/wire/modules/Textformatter/TextformatterMarkdownExtra/parsedown/LICENSE.txt b/wire/modules/Textformatter/TextformatterMarkdownExtra/parsedown/LICENSE.txt index baca86f5..8e7c764d 100755 --- a/wire/modules/Textformatter/TextformatterMarkdownExtra/parsedown/LICENSE.txt +++ b/wire/modules/Textformatter/TextformatterMarkdownExtra/parsedown/LICENSE.txt @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) 2013 Emanuil Rusev, erusev.com +Copyright (c) 2013-2018 Emanuil Rusev, erusev.com Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in @@ -17,4 +17,4 @@ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. \ No newline at end of file +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/wire/modules/Textformatter/TextformatterMarkdownExtra/parsedown/Parsedown.php b/wire/modules/Textformatter/TextformatterMarkdownExtra/parsedown/Parsedown.php index 3a6ee449..ae0cbdec 100755 --- a/wire/modules/Textformatter/TextformatterMarkdownExtra/parsedown/Parsedown.php +++ b/wire/modules/Textformatter/TextformatterMarkdownExtra/parsedown/Parsedown.php @@ -17,11 +17,24 @@ class Parsedown { # ~ - const version = '1.5.1'; + const version = '1.8.0-beta-7'; # ~ function text($text) + { + $Elements = $this->textElements($text); + + # convert to markup + $markup = $this->elements($Elements); + + # trim line breaks + $markup = trim($markup, "\n"); + + return $markup; + } + + protected function textElements($text) { # make sure no definitions are set $this->DefinitionData = array(); @@ -36,12 +49,7 @@ class Parsedown $lines = explode("\n", $text); # iterate through lines to identify blocks - $markup = $this->lines($lines); - - # trim line breaks - $markup = trim($markup, "\n"); - - return $markup; + return $this->linesElements($lines); } # @@ -75,6 +83,42 @@ class Parsedown protected $urlsLinked = true; + function setSafeMode($safeMode) + { + $this->safeMode = (bool) $safeMode; + + return $this; + } + + protected $safeMode; + + function setStrictMode($strictMode) + { + $this->strictMode = (bool) $strictMode; + + return $this; + } + + protected $strictMode; + + protected $safeLinksWhitelist = array( + 'http://', + 'https://', + 'ftp://', + 'ftps://', + 'mailto:', + 'tel:', + 'data:image/png;base64,', + 'data:image/gif;base64,', + 'data:image/jpeg;base64,', + 'irc:', + 'ircs:', + 'git:', + 'ssh:', + 'news:', + 'steam:', + ); + # # Lines # @@ -107,12 +151,6 @@ class Parsedown # ~ - protected $DefinitionTypes = array( - '[' => array('Reference'), - ); - - # ~ - protected $unmarkedBlockTypes = array( 'Code', ); @@ -121,8 +159,14 @@ class Parsedown # Blocks # - private function lines(array $lines) + protected function lines(array $lines) { + return $this->elements($this->linesElements($lines)); + } + + protected function linesElements(array $lines) + { + $Elements = array(); $CurrentBlock = null; foreach ($lines as $line) @@ -131,35 +175,25 @@ class Parsedown { if (isset($CurrentBlock)) { - $CurrentBlock['interrupted'] = true; + $CurrentBlock['interrupted'] = (isset($CurrentBlock['interrupted']) + ? $CurrentBlock['interrupted'] + 1 : 1 + ); } continue; } - if (strpos($line, "\t") !== false) + while (($beforeTab = strstr($line, "\t", true)) !== false) { - $parts = explode("\t", $line); + $shortage = 4 - mb_strlen($beforeTab, 'utf-8') % 4; - $line = $parts[0]; - - unset($parts[0]); - - foreach ($parts as $part) - { - $shortage = 4 - mb_strlen($line, 'utf-8') % 4; - - $line .= str_repeat(' ', $shortage); - $line .= $part; - } + $line = $beforeTab + . str_repeat(' ', $shortage) + . substr($line, strlen($beforeTab) + 1) + ; } - $indent = 0; - - while (isset($line[$indent]) and $line[$indent] === ' ') - { - $indent ++; - } + $indent = strspn($line, ' '); $text = $indent > 0 ? substr($line, $indent) : $line; @@ -169,9 +203,10 @@ class Parsedown # ~ - if (isset($CurrentBlock['incomplete'])) + if (isset($CurrentBlock['continuable'])) { - $Block = $this->{'block'.$CurrentBlock['type'].'Continue'}($Line, $CurrentBlock); + $methodName = 'block' . $CurrentBlock['type'] . 'Continue'; + $Block = $this->$methodName($Line, $CurrentBlock); if (isset($Block)) { @@ -181,12 +216,11 @@ class Parsedown } else { - if (method_exists($this, 'block'.$CurrentBlock['type'].'Complete')) + if ($this->isBlockCompletable($CurrentBlock['type'])) { - $CurrentBlock = $this->{'block'.$CurrentBlock['type'].'Complete'}($CurrentBlock); + $methodName = 'block' . $CurrentBlock['type'] . 'Complete'; + $CurrentBlock = $this->$methodName($CurrentBlock); } - - unset($CurrentBlock['incomplete']); } } @@ -211,7 +245,7 @@ class Parsedown foreach ($blockTypes as $blockType) { - $Block = $this->{'block'.$blockType}($Line, $CurrentBlock); + $Block = $this->{"block$blockType"}($Line, $CurrentBlock); if (isset($Block)) { @@ -219,14 +253,17 @@ class Parsedown if ( ! isset($Block['identified'])) { - $Blocks []= $CurrentBlock; + if (isset($CurrentBlock)) + { + $Elements[] = $this->extractElement($CurrentBlock); + } $Block['identified'] = true; } - if (method_exists($this, 'block'.$blockType.'Continue')) + if ($this->isBlockContinuable($blockType)) { - $Block['incomplete'] = true; + $Block['continuable'] = true; } $CurrentBlock = $Block; @@ -237,13 +274,21 @@ class Parsedown # ~ - if (isset($CurrentBlock) and ! isset($CurrentBlock['type']) and ! isset($CurrentBlock['interrupted'])) + if (isset($CurrentBlock) and $CurrentBlock['type'] === 'Paragraph') { - $CurrentBlock['element']['text'] .= "\n".$text; + $Block = $this->paragraphContinue($Line, $CurrentBlock); + } + + if (isset($Block)) + { + $CurrentBlock = $Block; } else { - $Blocks []= $CurrentBlock; + if (isset($CurrentBlock)) + { + $Elements[] = $this->extractElement($CurrentBlock); + } $CurrentBlock = $this->paragraph($Line); @@ -253,37 +298,49 @@ class Parsedown # ~ - if (isset($CurrentBlock['incomplete']) and method_exists($this, 'block'.$CurrentBlock['type'].'Complete')) + if (isset($CurrentBlock['continuable']) and $this->isBlockCompletable($CurrentBlock['type'])) { - $CurrentBlock = $this->{'block'.$CurrentBlock['type'].'Complete'}($CurrentBlock); + $methodName = 'block' . $CurrentBlock['type'] . 'Complete'; + $CurrentBlock = $this->$methodName($CurrentBlock); } # ~ - $Blocks []= $CurrentBlock; - - unset($Blocks[0]); + if (isset($CurrentBlock)) + { + $Elements[] = $this->extractElement($CurrentBlock); + } # ~ - $markup = ''; + return $Elements; + } - foreach ($Blocks as $Block) + protected function extractElement(array $Component) + { + if ( ! isset($Component['element'])) { - if (isset($Block['hidden'])) + if (isset($Component['markup'])) { - continue; + $Component['element'] = array('rawHtml' => $Component['markup']); + } + elseif (isset($Component['hidden'])) + { + $Component['element'] = array(); } - - $markup .= "\n"; - $markup .= isset($Block['markup']) ? $Block['markup'] : $this->element($Block['element']); } - $markup .= "\n"; + return $Component['element']; + } - # ~ + protected function isBlockContinuable($Type) + { + return method_exists($this, 'block' . $Type . 'Continue'); + } - return $markup; + protected function isBlockCompletable($Type) + { + return method_exists($this, 'block' . $Type . 'Complete'); } # @@ -291,7 +348,7 @@ class Parsedown protected function blockCode($Line, $Block = null) { - if (isset($Block) and ! isset($Block['type']) and ! isset($Block['interrupted'])) + if (isset($Block) and $Block['type'] === 'Paragraph' and ! isset($Block['interrupted'])) { return; } @@ -303,8 +360,7 @@ class Parsedown $Block = array( 'element' => array( 'name' => 'pre', - 'handler' => 'element', - 'text' => array( + 'element' => array( 'name' => 'code', 'text' => $text, ), @@ -321,16 +377,16 @@ class Parsedown { if (isset($Block['interrupted'])) { - $Block['element']['text']['text'] .= "\n"; + $Block['element']['element']['text'] .= str_repeat("\n", $Block['interrupted']); unset($Block['interrupted']); } - $Block['element']['text']['text'] .= "\n"; + $Block['element']['element']['text'] .= "\n"; $text = substr($Line['body'], 4); - $Block['element']['text']['text'] .= $text; + $Block['element']['element']['text'] .= $text; return $Block; } @@ -338,12 +394,6 @@ class Parsedown protected function blockCodeComplete($Block) { - $text = $Block['element']['text']['text']; - - $text = htmlspecialchars($text, ENT_NOQUOTES, 'UTF-8'); - - $Block['element']['text']['text'] = $text; - return $Block; } @@ -352,18 +402,21 @@ class Parsedown protected function blockComment($Line) { - if ($this->markupEscaped) + if ($this->markupEscaped or $this->safeMode) { return; } - if (isset($Line['text'][3]) and $Line['text'][3] === '-' and $Line['text'][2] === '-' and $Line['text'][1] === '!') + if (strpos($Line['text'], '$/', $Line['text'])) + if (strpos($Line['text'], '-->') !== false) { $Block['closed'] = true; } @@ -379,9 +432,9 @@ class Parsedown return; } - $Block['markup'] .= "\n" . $Line['body']; + $Block['element']['rawHtml'] .= "\n" . $Line['body']; - if (preg_match('/-->$/', $Line['text'])) + if (strpos($Line['text'], '-->') !== false) { $Block['closed'] = true; } @@ -394,33 +447,56 @@ class Parsedown protected function blockFencedCode($Line) { - if (preg_match('/^(['.$Line['text'][0].']{3,})[ ]*([\w-]+)?[ ]*$/', $Line['text'], $matches)) + $marker = $Line['text'][0]; + + $openerLength = strspn($Line['text'], $marker); + + if ($openerLength < 3) { - $Element = array( - 'name' => 'code', - 'text' => '', - ); - - if (isset($matches[2])) - { - $class = 'language-'.$matches[2]; - - $Element['attributes'] = array( - 'class' => $class, - ); - } - - $Block = array( - 'char' => $Line['text'][0], - 'element' => array( - 'name' => 'pre', - 'handler' => 'element', - 'text' => $Element, - ), - ); - - return $Block; + return; } + + $infostring = trim(substr($Line['text'], $openerLength), "\t "); + + if (strpos($infostring, '`') !== false) + { + return; + } + + $Element = array( + 'name' => 'code', + 'text' => '', + ); + + if ($infostring !== '') + { + /** + * https://www.w3.org/TR/2011/WD-html5-20110525/elements.html#classes + * Every HTML element may have a class attribute specified. + * The attribute, if specified, must have a value that is a set + * of space-separated tokens representing the various classes + * that the element belongs to. + * [...] + * The space characters, for the purposes of this specification, + * are U+0020 SPACE, U+0009 CHARACTER TABULATION (tab), + * U+000A LINE FEED (LF), U+000C FORM FEED (FF), and + * U+000D CARRIAGE RETURN (CR). + */ + $language = substr($infostring, 0, strcspn($infostring, " \t\n\f\r")); + + $Element['attributes'] = array('class' => "language-$language"); + } + + $Block = array( + 'char' => $marker, + 'openerLength' => $openerLength, + 'element' => array( + 'name' => 'pre', + 'element' => $Element, + ), + ); + + return $Block; } protected function blockFencedCodeContinue($Line, $Block) @@ -432,33 +508,28 @@ class Parsedown if (isset($Block['interrupted'])) { - $Block['element']['text']['text'] .= "\n"; + $Block['element']['element']['text'] .= str_repeat("\n", $Block['interrupted']); unset($Block['interrupted']); } - if (preg_match('/^'.$Block['char'].'{3,}[ ]*$/', $Line['text'])) - { - $Block['element']['text']['text'] = substr($Block['element']['text']['text'], 1); + if (($len = strspn($Line['text'], $Block['char'])) >= $Block['openerLength'] + and chop(substr($Line['text'], $len), ' ') === '' + ) { + $Block['element']['element']['text'] = substr($Block['element']['element']['text'], 1); $Block['complete'] = true; return $Block; } - $Block['element']['text']['text'] .= "\n".$Line['body'];; + $Block['element']['element']['text'] .= "\n" . $Line['body']; return $Block; } protected function blockFencedCodeComplete($Block) { - $text = $Block['element']['text']['text']; - - $text = htmlspecialchars($text, ENT_NOQUOTES, 'UTF-8'); - - $Block['element']['text']['text'] = $text; - return $Block; } @@ -467,61 +538,103 @@ class Parsedown protected function blockHeader($Line) { - if (isset($Line['text'][1])) + $level = strspn($Line['text'], '#'); + + if ($level > 6) { - $level = 1; - - while (isset($Line['text'][$level]) and $Line['text'][$level] === '#') - { - $level ++; - } - - if ($level > 6) - { - return; - } - - $text = trim($Line['text'], '# '); - - $Block = array( - 'element' => array( - 'name' => 'h' . min(6, $level), - 'text' => $text, - 'handler' => 'line', - ), - ); - - return $Block; + return; } + + $text = trim($Line['text'], '#'); + + if ($this->strictMode and isset($text[0]) and $text[0] !== ' ') + { + return; + } + + $text = trim($text, ' '); + + $Block = array( + 'element' => array( + 'name' => 'h' . $level, + 'handler' => array( + 'function' => 'lineElements', + 'argument' => $text, + 'destination' => 'elements', + ) + ), + ); + + return $Block; } # # List - protected function blockList($Line) + protected function blockList($Line, array $CurrentBlock = null) { - list($name, $pattern) = $Line['text'][0] <= '-' ? array('ul', '[*+-]') : array('ol', '[0-9]+[.]'); + list($name, $pattern) = $Line['text'][0] <= '-' ? array('ul', '[*+-]') : array('ol', '[0-9]{1,9}+[.\)]'); - if (preg_match('/^('.$pattern.'[ ]+)(.*)/', $Line['text'], $matches)) + if (preg_match('/^('.$pattern.'([ ]++|$))(.*+)/', $Line['text'], $matches)) { + $contentIndent = strlen($matches[2]); + + if ($contentIndent >= 5) + { + $contentIndent -= 1; + $matches[1] = substr($matches[1], 0, -$contentIndent); + $matches[3] = str_repeat(' ', $contentIndent) . $matches[3]; + } + elseif ($contentIndent === 0) + { + $matches[1] .= ' '; + } + + $markerWithoutWhitespace = strstr($matches[1], ' ', true); + $Block = array( 'indent' => $Line['indent'], 'pattern' => $pattern, + 'data' => array( + 'type' => $name, + 'marker' => $matches[1], + 'markerType' => ($name === 'ul' ? $markerWithoutWhitespace : substr($markerWithoutWhitespace, -1)), + ), 'element' => array( 'name' => $name, - 'handler' => 'elements', + 'elements' => array(), ), ); + $Block['data']['markerTypeRegex'] = preg_quote($Block['data']['markerType'], '/'); + + if ($name === 'ol') + { + $listStart = ltrim(strstr($matches[1], $Block['data']['markerType'], true), '0') ?: '0'; + + if ($listStart !== '1') + { + if ( + isset($CurrentBlock) + and $CurrentBlock['type'] === 'Paragraph' + and ! isset($CurrentBlock['interrupted']) + ) { + return; + } + + $Block['element']['attributes'] = array('start' => $listStart); + } + } $Block['li'] = array( 'name' => 'li', - 'handler' => 'li', - 'text' => array( - $matches[2], - ), + 'handler' => array( + 'function' => 'li', + 'argument' => !empty($matches[3]) ? array($matches[3]) : array(), + 'destination' => 'elements' + ) ); - $Block['element']['text'] []= & $Block['li']; + $Block['element']['elements'] []= & $Block['li']; return $Block; } @@ -529,11 +642,29 @@ class Parsedown protected function blockListContinue($Line, array $Block) { - if ($Block['indent'] === $Line['indent'] and preg_match('/^'.$Block['pattern'].'(?:[ ]+(.*)|$)/', $Line['text'], $matches)) + if (isset($Block['interrupted']) and empty($Block['li']['handler']['argument'])) { + return null; + } + + $requiredIndent = ($Block['indent'] + strlen($Block['data']['marker'])); + + if ($Line['indent'] < $requiredIndent + and ( + ( + $Block['data']['type'] === 'ol' + and preg_match('/^[0-9]++'.$Block['data']['markerTypeRegex'].'(?:[ ]++(.*)|$)/', $Line['text'], $matches) + ) or ( + $Block['data']['type'] === 'ul' + and preg_match('/^'.$Block['data']['markerTypeRegex'].'(?:[ ]++(.*)|$)/', $Line['text'], $matches) + ) + ) + ) { if (isset($Block['interrupted'])) { - $Block['li']['text'] []= ''; + $Block['li']['handler']['argument'] []= ''; + + $Block['loose'] = true; unset($Block['interrupted']); } @@ -542,45 +673,73 @@ class Parsedown $text = isset($matches[1]) ? $matches[1] : ''; + $Block['indent'] = $Line['indent']; + $Block['li'] = array( 'name' => 'li', - 'handler' => 'li', - 'text' => array( - $text, - ), + 'handler' => array( + 'function' => 'li', + 'argument' => array($text), + 'destination' => 'elements' + ) ); - $Block['element']['text'] []= & $Block['li']; + $Block['element']['elements'] []= & $Block['li']; return $Block; } + elseif ($Line['indent'] < $requiredIndent and $this->blockList($Line)) + { + return null; + } if ($Line['text'][0] === '[' and $this->blockReference($Line)) { return $Block; } + if ($Line['indent'] >= $requiredIndent) + { + if (isset($Block['interrupted'])) + { + $Block['li']['handler']['argument'] []= ''; + + $Block['loose'] = true; + + unset($Block['interrupted']); + } + + $text = substr($Line['body'], $requiredIndent); + + $Block['li']['handler']['argument'] []= $text; + + return $Block; + } + if ( ! isset($Block['interrupted'])) { - $text = preg_replace('/^[ ]{0,4}/', '', $Line['body']); + $text = preg_replace('/^[ ]{0,'.$requiredIndent.'}+/', '', $Line['body']); - $Block['li']['text'] []= $text; + $Block['li']['handler']['argument'] []= $text; return $Block; } + } - if ($Line['indent'] > 0) + protected function blockListComplete(array $Block) + { + if (isset($Block['loose'])) { - $Block['li']['text'] []= ''; - - $text = preg_replace('/^[ ]{0,4}/', '', $Line['body']); - - $Block['li']['text'] []= $text; - - unset($Block['interrupted']); - - return $Block; + foreach ($Block['element']['elements'] as &$li) + { + if (end($li['handler']['argument']) !== '') + { + $li['handler']['argument'] []= ''; + } + } } + + return $Block; } # @@ -588,13 +747,16 @@ class Parsedown protected function blockQuote($Line) { - if (preg_match('/^>[ ]?(.*)/', $Line['text'], $matches)) + if (preg_match('/^>[ ]?+(.*+)/', $Line['text'], $matches)) { $Block = array( 'element' => array( 'name' => 'blockquote', - 'handler' => 'lines', - 'text' => (array) $matches[1], + 'handler' => array( + 'function' => 'linesElements', + 'argument' => (array) $matches[1], + 'destination' => 'elements', + ) ), ); @@ -604,23 +766,21 @@ class Parsedown protected function blockQuoteContinue($Line, array $Block) { - if ($Line['text'][0] === '>' and preg_match('/^>[ ]?(.*)/', $Line['text'], $matches)) + if (isset($Block['interrupted'])) { - if (isset($Block['interrupted'])) - { - $Block['element']['text'] []= ''; + return; + } - unset($Block['interrupted']); - } - - $Block['element']['text'] []= $matches[1]; + if ($Line['text'][0] === '>' and preg_match('/^>[ ]?+(.*+)/', $Line['text'], $matches)) + { + $Block['element']['handler']['argument'] []= $matches[1]; return $Block; } if ( ! isset($Block['interrupted'])) { - $Block['element']['text'] []= $Line['text']; + $Block['element']['handler']['argument'] []= $Line['text']; return $Block; } @@ -631,11 +791,13 @@ class Parsedown protected function blockRule($Line) { - if (preg_match('/^(['.$Line['text'][0].'])([ ]*\1){2,}[ ]*$/', $Line['text'])) + $marker = $Line['text'][0]; + + if (substr_count($Line['text'], $marker) >= 3 and chop($Line['text'], " $marker") === '') { $Block = array( 'element' => array( - 'name' => 'hr' + 'name' => 'hr', ), ); @@ -648,12 +810,12 @@ class Parsedown protected function blockSetextHeader($Line, array $Block = null) { - if ( ! isset($Block) or isset($Block['type']) or isset($Block['interrupted'])) + if ( ! isset($Block) or $Block['type'] !== 'Paragraph' or isset($Block['interrupted'])) { return; } - if (chop($Line['text'], $Line['text'][0]) === '') + if ($Line['indent'] < 4 and chop(chop($Line['text'], ' '), $Line['text'][0]) === '') { $Block['element']['name'] = $Line['text'][0] === '=' ? 'h1' : 'h2'; @@ -666,86 +828,40 @@ class Parsedown protected function blockMarkup($Line) { - if ($this->markupEscaped) + if ($this->markupEscaped or $this->safeMode) { return; } - if (preg_match('/^<(\w*)(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*(\/)?>/', $Line['text'], $matches)) + if (preg_match('/^<[\/]?+(\w*)(?:[ ]*+'.$this->regexHtmlAttribute.')*+[ ]*+(\/)?>/', $Line['text'], $matches)) { - if (in_array($matches[1], $this->textLevelElements)) + $element = strtolower($matches[1]); + + if (in_array($element, $this->textLevelElements)) { return; } $Block = array( 'name' => $matches[1], - 'depth' => 0, - 'markup' => $Line['text'], + 'element' => array( + 'rawHtml' => $Line['text'], + 'autobreak' => true, + ), ); - $length = strlen($matches[0]); - - $remainder = substr($Line['text'], $length); - - if (trim($remainder) === '') - { - if (isset($matches[2]) or in_array($matches[1], $this->voidElements)) - { - $Block['closed'] = true; - - $Block['void'] = true; - } - } - else - { - if (isset($matches[2]) or in_array($matches[1], $this->voidElements)) - { - return; - } - - if (preg_match('/<\/'.$matches[1].'>[ ]*$/i', $remainder)) - { - $Block['closed'] = true; - } - } - return $Block; } } protected function blockMarkupContinue($Line, array $Block) { - if (isset($Block['closed'])) + if (isset($Block['closed']) or isset($Block['interrupted'])) { return; } - if (preg_match('/^<'.$Block['name'].'(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*>/i', $Line['text'])) # open - { - $Block['depth'] ++; - } - - if (preg_match('/(.*?)<\/'.$Block['name'].'>[ ]*$/i', $Line['text'], $matches)) # close - { - if ($Block['depth'] > 0) - { - $Block['depth'] --; - } - else - { - $Block['closed'] = true; - } - } - - if (isset($Block['interrupted'])) - { - $Block['markup'] .= "\n"; - - unset($Block['interrupted']); - } - - $Block['markup'] .= "\n".$Line['body']; + $Block['element']['rawHtml'] .= "\n" . $Line['body']; return $Block; } @@ -755,24 +871,20 @@ class Parsedown protected function blockReference($Line) { - if (preg_match('/^\[(.+?)\]:[ ]*(\S+?)>?(?:[ ]+["\'(](.+)["\')])?[ ]*$/', $Line['text'], $matches)) - { + if (strpos($Line['text'], ']') !== false + and preg_match('/^\[(.+?)\]:[ ]*+(\S+?)>?(?:[ ]+["\'(](.+)["\')])?[ ]*+$/', $Line['text'], $matches) + ) { $id = strtolower($matches[1]); $Data = array( 'url' => $matches[2], - 'title' => null, + 'title' => isset($matches[3]) ? $matches[3] : null, ); - if (isset($matches[3])) - { - $Data['title'] = $matches[3]; - } - $this->DefinitionData['Reference'][$id] = $Data; $Block = array( - 'hidden' => true, + 'element' => array(), ); return $Block; @@ -784,109 +896,125 @@ class Parsedown protected function blockTable($Line, array $Block = null) { - if ( ! isset($Block) or isset($Block['type']) or isset($Block['interrupted'])) + if ( ! isset($Block) or $Block['type'] !== 'Paragraph' or isset($Block['interrupted'])) { return; } - if (strpos($Block['element']['text'], '|') !== false and chop($Line['text'], ' -:|') === '') - { - $alignments = array(); - - $divider = $Line['text']; - - $divider = trim($divider); - $divider = trim($divider, '|'); - - $dividerCells = explode('|', $divider); - - foreach ($dividerCells as $dividerCell) - { - $dividerCell = trim($dividerCell); - - if ($dividerCell === '') - { - continue; - } - - $alignment = null; - - if ($dividerCell[0] === ':') - { - $alignment = 'left'; - } - - if (substr($dividerCell, - 1) === ':') - { - $alignment = $alignment === 'left' ? 'center' : 'right'; - } - - $alignments []= $alignment; - } - - # ~ - - $HeaderElements = array(); - - $header = $Block['element']['text']; - - $header = trim($header); - $header = trim($header, '|'); - - $headerCells = explode('|', $header); - - foreach ($headerCells as $index => $headerCell) - { - $headerCell = trim($headerCell); - - $HeaderElement = array( - 'name' => 'th', - 'text' => $headerCell, - 'handler' => 'line', - ); - - if (isset($alignments[$index])) - { - $alignment = $alignments[$index]; - - $HeaderElement['attributes'] = array( - 'style' => 'text-align: '.$alignment.';', - ); - } - - $HeaderElements []= $HeaderElement; - } - - # ~ - - $Block = array( - 'alignments' => $alignments, - 'identified' => true, - 'element' => array( - 'name' => 'table', - 'handler' => 'elements', - ), - ); - - $Block['element']['text'] []= array( - 'name' => 'thead', - 'handler' => 'elements', - ); - - $Block['element']['text'] []= array( - 'name' => 'tbody', - 'handler' => 'elements', - 'text' => array(), - ); - - $Block['element']['text'][0]['text'] []= array( - 'name' => 'tr', - 'handler' => 'elements', - 'text' => $HeaderElements, - ); - - return $Block; + if ( + strpos($Block['element']['handler']['argument'], '|') === false + and strpos($Line['text'], '|') === false + and strpos($Line['text'], ':') === false + or strpos($Block['element']['handler']['argument'], "\n") !== false + ) { + return; } + + if (chop($Line['text'], ' -:|') !== '') + { + return; + } + + $alignments = array(); + + $divider = $Line['text']; + + $divider = trim($divider); + $divider = trim($divider, '|'); + + $dividerCells = explode('|', $divider); + + foreach ($dividerCells as $dividerCell) + { + $dividerCell = trim($dividerCell); + + if ($dividerCell === '') + { + return; + } + + $alignment = null; + + if ($dividerCell[0] === ':') + { + $alignment = 'left'; + } + + if (substr($dividerCell, - 1) === ':') + { + $alignment = $alignment === 'left' ? 'center' : 'right'; + } + + $alignments []= $alignment; + } + + # ~ + + $HeaderElements = array(); + + $header = $Block['element']['handler']['argument']; + + $header = trim($header); + $header = trim($header, '|'); + + $headerCells = explode('|', $header); + + if (count($headerCells) !== count($alignments)) + { + return; + } + + foreach ($headerCells as $index => $headerCell) + { + $headerCell = trim($headerCell); + + $HeaderElement = array( + 'name' => 'th', + 'handler' => array( + 'function' => 'lineElements', + 'argument' => $headerCell, + 'destination' => 'elements', + ) + ); + + if (isset($alignments[$index])) + { + $alignment = $alignments[$index]; + + $HeaderElement['attributes'] = array( + 'style' => "text-align: $alignment;", + ); + } + + $HeaderElements []= $HeaderElement; + } + + # ~ + + $Block = array( + 'alignments' => $alignments, + 'identified' => true, + 'element' => array( + 'name' => 'table', + 'elements' => array(), + ), + ); + + $Block['element']['elements'] []= array( + 'name' => 'thead', + ); + + $Block['element']['elements'] []= array( + 'name' => 'tbody', + 'elements' => array(), + ); + + $Block['element']['elements'][0]['elements'] []= array( + 'name' => 'tr', + 'elements' => $HeaderElements, + ); + + return $Block; } protected function blockTableContinue($Line, array $Block) @@ -896,7 +1024,7 @@ class Parsedown return; } - if ($Line['text'][0] === '|' or strpos($Line['text'], '|')) + if (count($Block['alignments']) === 1 or $Line['text'][0] === '|' or strpos($Line['text'], '|')) { $Elements = array(); @@ -905,22 +1033,27 @@ class Parsedown $row = trim($row); $row = trim($row, '|'); - preg_match_all('/(?:(\\\\[|])|[^|`]|`[^`]+`|`)+/', $row, $matches); + preg_match_all('/(?:(\\\\[|])|[^|`]|`[^`]++`|`)++/', $row, $matches); - foreach ($matches[0] as $index => $cell) + $cells = array_slice($matches[0], 0, count($Block['alignments'])); + + foreach ($cells as $index => $cell) { $cell = trim($cell); $Element = array( 'name' => 'td', - 'handler' => 'line', - 'text' => $cell, + 'handler' => array( + 'function' => 'lineElements', + 'argument' => $cell, + 'destination' => 'elements', + ) ); if (isset($Block['alignments'][$index])) { $Element['attributes'] = array( - 'style' => 'text-align: '.$Block['alignments'][$index].';', + 'style' => 'text-align: ' . $Block['alignments'][$index] . ';', ); } @@ -929,11 +1062,10 @@ class Parsedown $Element = array( 'name' => 'tr', - 'handler' => 'elements', - 'text' => $Elements, + 'elements' => $Elements, ); - $Block['element']['text'][1]['text'] []= $Element; + $Block['element']['elements'][1]['elements'] []= $Element; return $Block; } @@ -945,13 +1077,27 @@ class Parsedown protected function paragraph($Line) { - $Block = array( + return array( + 'type' => 'Paragraph', 'element' => array( 'name' => 'p', - 'text' => $Line['text'], - 'handler' => 'line', + 'handler' => array( + 'function' => 'lineElements', + 'argument' => $Line['text'], + 'destination' => 'elements', + ), ), ); + } + + protected function paragraphContinue($Line, array $Block) + { + if (isset($Block['interrupted'])) + { + return; + } + + $Block['element']['handler']['argument'] .= "\n".$Line['text']; return $Block; } @@ -961,13 +1107,11 @@ class Parsedown # protected $InlineTypes = array( - '"' => array('SpecialCharacter'), '!' => array('Image'), '&' => array('SpecialCharacter'), '*' => array('Emphasis'), ':' => array('Url'), - '<' => array('UrlTag', 'EmailTag', 'Markup', 'SpecialCharacter'), - '>' => array('SpecialCharacter'), + '<' => array('UrlTag', 'EmailTag', 'Markup'), '[' => array('Link'), '_' => array('Emphasis'), '`' => array('Code'), @@ -977,85 +1121,148 @@ class Parsedown # ~ - protected $inlineMarkerList = '!"*_&[:<>`~\\'; + protected $inlineMarkerList = '!*_&[:<`~\\'; # # ~ # - public function line($text) + public function line($text, $nonNestables = array()) { - $markup = ''; + return $this->elements($this->lineElements($text, $nonNestables)); + } - $unexaminedText = $text; + protected function lineElements($text, $nonNestables = array()) + { + # standardize line breaks + $text = str_replace(array("\r\n", "\r"), "\n", $text); - $markerPosition = 0; + $Elements = array(); - while ($excerpt = strpbrk($unexaminedText, $this->inlineMarkerList)) + $nonNestables = (empty($nonNestables) + ? array() + : array_combine($nonNestables, $nonNestables) + ); + + # $excerpt is based on the first occurrence of a marker + + while ($excerpt = strpbrk($text, $this->inlineMarkerList)) { $marker = $excerpt[0]; - $markerPosition += strpos($unexaminedText, $marker); + $markerPosition = strlen($text) - strlen($excerpt); $Excerpt = array('text' => $excerpt, 'context' => $text); foreach ($this->InlineTypes[$marker] as $inlineType) { - $Inline = $this->{'inline'.$inlineType}($Excerpt); + # check to see if the current inline type is nestable in the current context + + if (isset($nonNestables[$inlineType])) + { + continue; + } + + $Inline = $this->{"inline$inlineType"}($Excerpt); if ( ! isset($Inline)) { continue; } - if (isset($Inline['position']) and $Inline['position'] > $markerPosition) # position is ahead of marker + # makes sure that the inline belongs to "our" marker + + if (isset($Inline['position']) and $Inline['position'] > $markerPosition) { continue; } + # sets a default inline position + if ( ! isset($Inline['position'])) { $Inline['position'] = $markerPosition; } + # cause the new element to 'inherit' our non nestables + + + $Inline['element']['nonNestables'] = isset($Inline['element']['nonNestables']) + ? array_merge($Inline['element']['nonNestables'], $nonNestables) + : $nonNestables + ; + + # the text that comes before the inline $unmarkedText = substr($text, 0, $Inline['position']); - $markup .= $this->unmarkedText($unmarkedText); + # compile the unmarked text + $InlineText = $this->inlineText($unmarkedText); + $Elements[] = $InlineText['element']; - $markup .= isset($Inline['markup']) ? $Inline['markup'] : $this->element($Inline['element']); + # compile the inline + $Elements[] = $this->extractElement($Inline); + # remove the examined text $text = substr($text, $Inline['position'] + $Inline['extent']); - $unexaminedText = $text; - - $markerPosition = 0; - continue 2; } - $unexaminedText = substr($excerpt, 1); + # the marker does not belong to an inline - $markerPosition ++; + $unmarkedText = substr($text, 0, $markerPosition + 1); + + $InlineText = $this->inlineText($unmarkedText); + $Elements[] = $InlineText['element']; + + $text = substr($text, $markerPosition + 1); } - $markup .= $this->unmarkedText($text); + $InlineText = $this->inlineText($text); + $Elements[] = $InlineText['element']; - return $markup; + foreach ($Elements as &$Element) + { + if ( ! isset($Element['autobreak'])) + { + $Element['autobreak'] = false; + } + } + + return $Elements; } # # ~ # + protected function inlineText($text) + { + $Inline = array( + 'extent' => strlen($text), + 'element' => array(), + ); + + $Inline['element']['elements'] = self::pregReplaceElements( + $this->breaksEnabled ? '/[ ]*+\n/' : '/(?:[ ]*+\\\\|[ ]{2,}+)\n/', + array( + array('name' => 'br'), + array('text' => "\n"), + ), + $text + ); + + return $Inline; + } + protected function inlineCode($Excerpt) { $marker = $Excerpt['text'][0]; - if (preg_match('/^('.$marker.'+)[ ]*(.+?)[ ]*(? strlen($matches[0]), @@ -1069,13 +1276,19 @@ class Parsedown protected function inlineEmailTag($Excerpt) { - if (strpos($Excerpt['text'], '>') !== false and preg_match('/^<((mailto:)?\S+?@\S+?)>/i', $Excerpt['text'], $matches)) - { + $hostnameLabel = '[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?'; + + $commonMarkEmail = '[a-zA-Z0-9.!#$%&\'*+\/=?^_`{|}~-]++@' + . $hostnameLabel . '(?:\.' . $hostnameLabel . ')*'; + + if (strpos($Excerpt['text'], '>') !== false + and preg_match("/^<((mailto:)?$commonMarkEmail)>/i", $Excerpt['text'], $matches) + ){ $url = $matches[1]; if ( ! isset($matches[2])) { - $url = 'mailto:' . $url; + $url = "mailto:$url"; } return array( @@ -1117,8 +1330,11 @@ class Parsedown 'extent' => strlen($matches[0]), 'element' => array( 'name' => $emphasis, - 'handler' => 'line', - 'text' => $matches[1], + 'handler' => array( + 'function' => 'lineElements', + 'argument' => $matches[1], + 'destination' => 'elements', + ) ), ); } @@ -1128,7 +1344,7 @@ class Parsedown if (isset($Excerpt['text'][1]) and in_array($Excerpt['text'][1], $this->specialCharacters)) { return array( - 'markup' => $Excerpt['text'][1], + 'element' => array('rawHtml' => $Excerpt['text'][1]), 'extent' => 2, ); } @@ -1156,8 +1372,9 @@ class Parsedown 'name' => 'img', 'attributes' => array( 'src' => $Link['element']['attributes']['href'], - 'alt' => $Link['element']['text'], + 'alt' => $Link['element']['handler']['argument'], ), + 'autobreak' => true, ), ); @@ -1172,8 +1389,12 @@ class Parsedown { $Element = array( 'name' => 'a', - 'handler' => 'line', - 'text' => null, + 'handler' => array( + 'function' => 'lineElements', + 'argument' => null, + 'destination' => 'elements', + ), + 'nonNestables' => array('Url', 'Link'), 'attributes' => array( 'href' => null, 'title' => null, @@ -1184,9 +1405,9 @@ class Parsedown $remainder = $Excerpt['text']; - if (preg_match('/\[((?:[^][]|(?R))*)\]/', $remainder, $matches)) + if (preg_match('/\[((?:[^][]++|(?R))*+)\]/', $remainder, $matches)) { - $Element['text'] = $matches[1]; + $Element['handler']['argument'] = $matches[1]; $extent += strlen($matches[0]); @@ -1197,7 +1418,7 @@ class Parsedown return; } - if (preg_match('/^[(]((?:[^ ()]|[(][^ )]+[)])+)(?:[ ]+("[^"]+"|\'[^\']+\'))?[)]/', $remainder, $matches)) + if (preg_match('/^[(]\s*+((?:[^ ()]++|[(][^ )]+[)])++)(?:[ ]+("[^"]*+"|\'[^\']*+\'))?\s*+[)]/', $remainder, $matches)) { $Element['attributes']['href'] = $matches[1]; @@ -1212,14 +1433,14 @@ class Parsedown { if (preg_match('/^\s*\[(.*?)\]/', $remainder, $matches)) { - $definition = $matches[1] ? $matches[1] : $Element['text']; + $definition = strlen($matches[1]) ? $matches[1] : $Element['handler']['argument']; $definition = strtolower($definition); $extent += strlen($matches[0]); } else { - $definition = strtolower($Element['text']); + $definition = strtolower($Element['handler']['argument']); } if ( ! isset($this->DefinitionData['Reference'][$definition])) @@ -1233,8 +1454,6 @@ class Parsedown $Element['attributes']['title'] = $Definition['title']; } - $Element['attributes']['href'] = str_replace(array('&', '<'), array('&', '<'), $Element['attributes']['href']); - return array( 'extent' => $extent, 'element' => $Element, @@ -1243,31 +1462,31 @@ class Parsedown protected function inlineMarkup($Excerpt) { - if ($this->markupEscaped or strpos($Excerpt['text'], '>') === false) + if ($this->markupEscaped or $this->safeMode or strpos($Excerpt['text'], '>') === false) { return; } - if ($Excerpt['text'][1] === '/' and preg_match('/^<\/\w*[ ]*>/s', $Excerpt['text'], $matches)) + if ($Excerpt['text'][1] === '/' and preg_match('/^<\/\w[\w-]*+[ ]*+>/s', $Excerpt['text'], $matches)) { return array( - 'markup' => $matches[0], + 'element' => array('rawHtml' => $matches[0]), 'extent' => strlen($matches[0]), ); } - if ($Excerpt['text'][1] === '!' and preg_match('/^/s', $Excerpt['text'], $matches)) + if ($Excerpt['text'][1] === '!' and preg_match('/^/s', $Excerpt['text'], $matches)) { return array( - 'markup' => $matches[0], + 'element' => array('rawHtml' => $matches[0]), 'extent' => strlen($matches[0]), ); } - if ($Excerpt['text'][1] !== ' ' and preg_match('/^<\w*(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*\/?>/s', $Excerpt['text'], $matches)) + if ($Excerpt['text'][1] !== ' ' and preg_match('/^<\w[\w-]*+(?:[ ]*+'.$this->regexHtmlAttribute.')*+[ ]*+\/?>/s', $Excerpt['text'], $matches)) { return array( - 'markup' => $matches[0], + 'element' => array('rawHtml' => $matches[0]), 'extent' => strlen($matches[0]), ); } @@ -1275,23 +1494,16 @@ class Parsedown protected function inlineSpecialCharacter($Excerpt) { - if ($Excerpt['text'][0] === '&' and ! preg_match('/^?\w+;/', $Excerpt['text'])) - { + if (substr($Excerpt['text'], 1, 1) !== ' ' and strpos($Excerpt['text'], ';') !== false + and preg_match('/^&(#?+[0-9a-zA-Z]++);/', $Excerpt['text'], $matches) + ) { return array( - 'markup' => '&', - 'extent' => 1, + 'element' => array('rawHtml' => '&' . $matches[1] . ';'), + 'extent' => strlen($matches[0]), ); } - $SpecialCharacter = array('>' => 'gt', '<' => 'lt', '"' => 'quot'); - - if (isset($SpecialCharacter[$Excerpt['text'][0]])) - { - return array( - 'markup' => '&'.$SpecialCharacter[$Excerpt['text'][0]].';', - 'extent' => 1, - ); - } + return; } protected function inlineStrikethrough($Excerpt) @@ -1307,8 +1519,11 @@ class Parsedown 'extent' => strlen($matches[0]), 'element' => array( 'name' => 'del', - 'text' => $matches[1], - 'handler' => 'line', + 'handler' => array( + 'function' => 'lineElements', + 'argument' => $matches[1], + 'destination' => 'elements', + ) ), ); } @@ -1321,16 +1536,19 @@ class Parsedown return; } - if (preg_match('/\bhttps?:[\/]{2}[^\s<]+\b\/*/ui', $Excerpt['context'], $matches, PREG_OFFSET_CAPTURE)) - { + if (strpos($Excerpt['context'], 'http') !== false + and preg_match('/\bhttps?+:[\/]{2}[^\s<]+\b\/*+/ui', $Excerpt['context'], $matches, PREG_OFFSET_CAPTURE) + ) { + $url = $matches[0][0]; + $Inline = array( 'extent' => strlen($matches[0][0]), 'position' => $matches[0][1], 'element' => array( 'name' => 'a', - 'text' => $matches[0][0], + 'text' => $url, 'attributes' => array( - 'href' => $matches[0][0], + 'href' => $url, ), ), ); @@ -1341,9 +1559,9 @@ class Parsedown protected function inlineUrlTag($Excerpt) { - if (strpos($Excerpt['text'], '>') !== false and preg_match('/^<(\w+:\/{2}[^ >]+)>/i', $Excerpt['text'], $matches)) + if (strpos($Excerpt['text'], '>') !== false and preg_match('/^<(\w++:\/{2}[^ >]++)>/i', $Excerpt['text'], $matches)) { - $url = str_replace(array('&', '<'), array('&', '<'), $matches[1]); + $url = $matches[1]; return array( 'extent' => strlen($matches[0]), @@ -1362,56 +1580,189 @@ class Parsedown protected function unmarkedText($text) { - if ($this->breaksEnabled) - { - $text = preg_replace('/[ ]*\n/', "') - { - $markup = $trimmedMarkup; - $markup = substr($markup, 3); - - $position = strpos($markup, "
"); - - $markup = substr_replace($markup, '', $position, 4); + if ( ! in_array('', $lines) + and isset($Elements[0]) and isset($Elements[0]['name']) + and $Elements[0]['name'] === 'p' + ) { + unset($Elements[0]['name']); } - return $markup; + return $Elements; + } + + # + # AST Convenience + # + + /** + * Replace occurrences $regexp with $Elements in $text. Return an array of + * elements representing the replacement. + */ + protected static function pregReplaceElements($regexp, $Elements, $text) + { + $newElements = array(); + + while (preg_match($regexp, $text, $matches, PREG_OFFSET_CAPTURE)) + { + $offset = $matches[0][1]; + $before = substr($text, 0, $offset); + $after = substr($text, $offset + strlen($matches[0][0])); + + $newElements[] = array('text' => $before); + + foreach ($Elements as $Element) + { + $newElements[] = $Element; + } + + $text = $after; + } + + $newElements[] = array('text' => $text); + + return $newElements; } # @@ -1465,10 +1858,83 @@ class Parsedown return $markup; } + protected function sanitiseElement(array $Element) + { + static $goodAttribute = '/^[a-zA-Z0-9][a-zA-Z0-9-_]*+$/'; + static $safeUrlNameToAtt = array( + 'a' => 'href', + 'img' => 'src', + ); + + if ( ! isset($Element['name'])) + { + unset($Element['attributes']); + return $Element; + } + + if (isset($safeUrlNameToAtt[$Element['name']])) + { + $Element = $this->filterUnsafeUrlInAttribute($Element, $safeUrlNameToAtt[$Element['name']]); + } + + if ( ! empty($Element['attributes'])) + { + foreach ($Element['attributes'] as $att => $val) + { + # filter out badly parsed attribute + if ( ! preg_match($goodAttribute, $att)) + { + unset($Element['attributes'][$att]); + } + # dump onevent attribute + elseif (self::striAtStart($att, 'on')) + { + unset($Element['attributes'][$att]); + } + } + } + + return $Element; + } + + protected function filterUnsafeUrlInAttribute(array $Element, $attribute) + { + foreach ($this->safeLinksWhitelist as $scheme) + { + if (self::striAtStart($Element['attributes'][$attribute], $scheme)) + { + return $Element; + } + } + + $Element['attributes'][$attribute] = str_replace(':', '%3A', $Element['attributes'][$attribute]); + + return $Element; + } + # # Static Methods # + protected static function escape($text, $allowQuotes = false) + { + return htmlspecialchars($text, $allowQuotes ? ENT_NOQUOTES : ENT_QUOTES, 'UTF-8'); + } + + protected static function striAtStart($string, $needle) + { + $len = strlen($needle); + + if ($len > strlen($string)) + { + return false; + } + else + { + return strtolower(substr($string, 0, $len)) === strtolower($needle); + } + } + static function instance($name = 'default') { if (isset(self::$instances[$name])) @@ -1476,7 +1942,7 @@ class Parsedown return self::$instances[$name]; } - $instance = new self(); + $instance = new static(); self::$instances[$name] = $instance; @@ -1495,12 +1961,12 @@ class Parsedown # Read-Only protected $specialCharacters = array( - '\\', '`', '*', '_', '{', '}', '[', ']', '(', ')', '>', '#', '+', '-', '.', '!', '|', + '\\', '`', '*', '_', '{', '}', '[', ']', '(', ')', '>', '#', '+', '-', '.', '!', '|', '~' ); protected $StrongRegex = array( - '*' => '/^[*]{2}((?:\\\\\*|[^*]|[*][^*]*[*])+?)[*]{2}(?![*])/s', - '_' => '/^__((?:\\\\_|[^_]|_[^_]*_)+?)__(?!_)/us', + '*' => '/^[*]{2}((?:\\\\\*|[^*]|[*][^*]*+[*])+?)[*]{2}(?![*])/s', + '_' => '/^__((?:\\\\_|[^_]|_[^_]*+_)+?)__(?!_)/us', ); protected $EmRegex = array( @@ -1508,7 +1974,7 @@ class Parsedown '_' => '/^_((?:\\\\_|[^_]|__[^_]*__)+?)_(?!_)\b/us', ); - protected $regexHtmlAttribute = '[a-zA-Z_:][\w:.-]*(?:\s*=\s*(?:[^"\'=<>`\s]+|"[^"]*"|\'[^\']*\'))?'; + protected $regexHtmlAttribute = '[a-zA-Z_:][\w:.-]*+(?:\s*+=\s*+(?:[^"\'=<>`\s]+|"[^"]*+"|\'[^\']*+\'))?+'; protected $voidElements = array( 'area', 'base', 'br', 'col', 'command', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source', @@ -1519,10 +1985,10 @@ class Parsedown 'b', 'em', 'big', 'cite', 'small', 'spacer', 'listing', 'i', 'rp', 'del', 'code', 'strike', 'marquee', 'q', 'rt', 'ins', 'font', 'strong', - 's', 'tt', 'sub', 'mark', - 'u', 'xm', 'sup', 'nobr', - 'var', 'ruby', - 'wbr', 'span', - 'time', + 's', 'tt', 'kbd', 'mark', + 'u', 'xm', 'sub', 'nobr', + 'sup', 'ruby', + 'var', 'span', + 'wbr', 'time', ); } diff --git a/wire/modules/Textformatter/TextformatterMarkdownExtra/parsedown/README.md b/wire/modules/Textformatter/TextformatterMarkdownExtra/parsedown/README.md index 8d485e7d..a4b8c63a 100755 --- a/wire/modules/Textformatter/TextformatterMarkdownExtra/parsedown/README.md +++ b/wire/modules/Textformatter/TextformatterMarkdownExtra/parsedown/README.md @@ -1,37 +1,88 @@ -## Parsedown + -Better Markdown Parser in PHP +Hello Parsedown!
``` -More examples in [the wiki](https://github.com/erusev/parsedown/wiki/Usage) and in [this video tutorial](http://youtu.be/wYZBY8DEikI). +You can also parse inline markdown only: -### Questions +```php +echo $Parsedown->line('Hello _Parsedown_!'); # prints: Hello Parsedown! +``` + +More examples in [the wiki](https://github.com/erusev/parsedown/wiki/) and in [this video tutorial](http://youtu.be/wYZBY8DEikI). + +## Security + +Parsedown is capable of escaping user-input within the HTML that it generates. Additionally Parsedown will apply sanitisation to additional scripting vectors (such as scripting link destinations) that are introduced by the markdown syntax itself. + +To tell Parsedown that it is processing untrusted user-input, use the following: + +```php +$Parsedown->setSafeMode(true); +``` + +If instead, you wish to allow HTML within untrusted user-input, but still want output to be free from XSS it is recommended that you make use of a HTML sanitiser that allows HTML tags to be whitelisted, like [HTML Purifier](http://htmlpurifier.org/). + +In both cases you should strongly consider employing defence-in-depth measures, like [deploying a Content-Security-Policy](https://scotthelme.co.uk/content-security-policy-an-introduction/) (a browser security feature) so that your page is likely to be safe even if an attacker finds a vulnerability in one of the first lines of defence above. + +#### Security of Parsedown Extensions + +Safe mode does not necessarily yield safe results when using extensions to Parsedown. Extensions should be evaluated on their own to determine their specific safety against XSS. + +## Escaping HTML + +> **WARNING:** This method isn't safe from XSS! + +If you wish to escape HTML **in trusted input**, you can use the following: + +```php +$Parsedown->setMarkupEscaped(true); +``` + +Beware that this still allows users to insert unsafe scripting vectors, such as links like `[xss](javascript:alert%281%29)`. + +## Questions **How does Parsedown work?** -It tries to read Markdown like a human. First, it looks at the lines. It’s interested in how the lines start. This helps it recognise blocks. It knows, for example, that if a line start with a `-` then it perhaps belong to a list. Once it recognises the blocks, it continues to the content. As it reads, it watches out for special characters. This helps it recognise inline elements (or inlines). +It tries to read Markdown like a human. First, it looks at the lines. It’s interested in how the lines start. This helps it recognise blocks. It knows, for example, that if a line starts with a `-` then perhaps it belongs to a list. Once it recognises the blocks, it continues to the content. As it reads, it watches out for special characters. This helps it recognise inline elements (or inlines). We call this approach "line based". We believe that Parsedown is the first Markdown parser to use it. Since the release of Parsedown, other developers have used the same approach to develop other Markdown parsers in PHP and in other languages. @@ -41,8 +92,12 @@ It passes most of the CommonMark tests. Most of the tests that don't pass deal w **Who uses it?** -[phpDocumentor](http://www.phpdoc.org/), [October CMS](http://octobercms.com/), [Bolt CMS](http://bolt.cm/), [Kirby CMS](http://getkirby.com/), [Grav CMS](http://getgrav.org/), [Statamic CMS](http://www.statamic.com/), [RaspberryPi.org](http://www.raspberrypi.org/) and [more](https://www.versioneye.com/php/erusev:parsedown/references). +[Laravel Framework](https://laravel.com/), [Bolt CMS](http://bolt.cm/), [Grav CMS](http://getgrav.org/), [Herbie CMS](http://www.getherbie.org/), [Kirby CMS](http://getkirby.com/), [October CMS](http://octobercms.com/), [Pico CMS](http://picocms.org), [Statamic CMS](http://www.statamic.com/), [phpDocumentor](http://www.phpdoc.org/), [RaspberryPi.org](http://www.raspberrypi.org/), [Symfony Demo](https://github.com/symfony/demo) and [more](https://packagist.org/packages/erusev/parsedown/dependents). **How can I help?** -Use it, star it, share it and if you feel generous, [donate some money](https://www.paypal.com/cgi-bin/webscr?cmd=_s-xclick&hosted_button_id=528P3NZQMP8N2). +Use it, star it, share it and if you feel generous, [donate](https://www.paypal.com/cgi-bin/webscr?cmd=_s-xclick&hosted_button_id=528P3NZQMP8N2). + +**What else should I know?** + +I also make [Nota](https://nota.md/) — a writing app designed for Markdown files :)