From 1a47e74be1c31dca2781a77c9a93c6adf7312fd9 Mon Sep 17 00:00:00 2001 From: Aidan Woods Date: Sun, 18 Mar 2018 22:33:26 +0000 Subject: [PATCH 1/8] Quotes are permitted in escaped body --- test/ParsedownTest.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/ParsedownTest.php b/test/ParsedownTest.php index 2f58418..01b4e25 100644 --- a/test/ParsedownTest.php +++ b/test/ParsedownTest.php @@ -159,12 +159,12 @@ MARKDOWN_WITH_MARKUP;

<div>content</div>

sparse:

<div> -<div class="inner"> +<div class="inner"> content </div> </div>

paragraph

-

<style type="text/css"> +

<style type="text/css"> p { color: red; } From 65d7bc5013d4f39d3fb8b4dfbe87f9627ced2d74 Mon Sep 17 00:00:00 2001 From: Aidan Woods Date: Sun, 18 Mar 2018 22:36:30 +0000 Subject: [PATCH 2/8] Special casing for elements with no name --- Parsedown.php | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/Parsedown.php b/Parsedown.php index d7337a5..715deee 100644 --- a/Parsedown.php +++ b/Parsedown.php @@ -1489,12 +1489,18 @@ class Parsedown { $markup = ''; + $autoBreak = true; + foreach ($Elements as $Element) { - $markup .= "\n" . $this->element($Element); + // (autobreak === false) covers both sides of an element + $autoBreak = !$autoBreak ? $autoBreak : isset($Element['name']); + + $markup .= ($autoBreak ? "\n" : '') . $this->element($Element); + $autoBreak = isset($Element['name']); } - $markup .= "\n"; + $markup .= $autoBreak ? "\n" : ''; return $markup; } @@ -1539,6 +1545,12 @@ class Parsedown 'img' => 'src', ); + if ( ! isset($Element['name'])) + { + unset($Element['attributes']); + return $Element; + } + if (isset($safeUrlNameToAtt[$Element['name']])) { $Element = $this->filterUnsafeUrlInAttribute($Element, $safeUrlNameToAtt[$Element['name']]); From adcba805022d380e99636b66ae882f40eb9baaef Mon Sep 17 00:00:00 2001 From: Aidan Woods Date: Sun, 18 Mar 2018 22:37:40 +0000 Subject: [PATCH 3/8] Implement unmarked text via AST --- Parsedown.php | 80 ++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 69 insertions(+), 11 deletions(-) diff --git a/Parsedown.php b/Parsedown.php index 715deee..79e3ba6 100644 --- a/Parsedown.php +++ b/Parsedown.php @@ -1070,6 +1070,41 @@ class Parsedown # ~ # + protected function inlineText($text) + { + $Inline = array( + 'extent' => strlen($text), + 'element' => array( + 'handler' => 'elements', + ), + ); + + if ($this->breaksEnabled) + { + $Inline['element']['text'] = self::pregReplaceElements( + '/[ ]*\n/', + array( + array('name' => 'br'), + array('text' => "\n"), + ), + $text + ); + } + else + { + $Inline['element']['text'] = self::pregReplaceElements( + '/(?:[ ][ ]+|[ ]*\\\\)\n/', + array( + array('name' => 'br'), + array('text' => "\n"), + ), + $text + ); + } + + return $Inline; + } + protected function inlineCode($Excerpt) { $marker = $Excerpt['text'][0]; @@ -1391,17 +1426,7 @@ class Parsedown protected function unmarkedText($text) { - if ($this->breaksEnabled) - { - $text = preg_replace('/[ ]*\n/', "
\n", $text); - } - else - { - $text = preg_replace('/(?:[ ][ ]+|[ ]*\\\\)\n/', "
\n", $text); - $text = str_replace(" \n", "\n", $text); - } - - return $text; + return $this->element($this->inlineText($text)['element']); } # @@ -1526,6 +1551,39 @@ class Parsedown return $markup; } + # + # AST Convenience + # + + /** + * Replace occurrences $regexp with $Elements in $text. Return an array of + * elements representing the replacement. + */ + protected static function pregReplaceElements($regexp, $Elements, $text) + { + $newElements = array(); + + while (preg_match($regexp, $text, $matches, PREG_OFFSET_CAPTURE)) + { + $offset = $matches[0][1]; + $before = substr($text, 0, $offset); + $after = substr($text, $offset + strlen($matches[0][0])); + + $newElements[] = array('text' => $before); + + foreach ($Elements as $Element) + { + $newElements[] = $Element; + } + + $text = $after; + } + + $newElements[] = array('text' => $text); + + return $newElements; + } + # # Deprecated Methods # From 011465bca6a830400e357c919441c7fd03670a0e Mon Sep 17 00:00:00 2001 From: Aidan Woods Date: Sun, 18 Mar 2018 22:44:07 +0000 Subject: [PATCH 4/8] Use rawHtml to provide conditional escaping for specialChars --- Parsedown.php | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/Parsedown.php b/Parsedown.php index 79e3ba6..ec0f0a9 100644 --- a/Parsedown.php +++ b/Parsedown.php @@ -964,13 +964,11 @@ class Parsedown # protected $InlineTypes = array( - '"' => array('SpecialCharacter'), '!' => array('Image'), '&' => array('SpecialCharacter'), '*' => array('Emphasis'), ':' => array('Url'), - '<' => array('UrlTag', 'EmailTag', 'Markup', 'SpecialCharacter'), - '>' => array('SpecialCharacter'), + '<' => array('UrlTag', 'EmailTag', 'Markup'), '[' => array('Link'), '_' => array('Emphasis'), '`' => array('Code'), @@ -980,7 +978,7 @@ class Parsedown # ~ - protected $inlineMarkerList = '!"*_&[:<>`~\\'; + protected $inlineMarkerList = '!*_&[:<`~\\'; # # ~ @@ -1337,23 +1335,15 @@ class Parsedown protected function inlineSpecialCharacter($Excerpt) { - if ($Excerpt['text'][0] === '&' and ! preg_match('/^&#?\w+;/', $Excerpt['text'])) + if (preg_match('/^&(#?+[0-9a-zA-Z]++);/', $Excerpt['text'], $matches)) { return array( - 'markup' => '&', - 'extent' => 1, + 'element' => array('rawHtml' => '&'.$matches[1].';'), + 'extent' => strlen($matches[0]), ); } - $SpecialCharacter = array('>' => 'gt', '<' => 'lt', '"' => 'quot'); - - if (isset($SpecialCharacter[$Excerpt['text'][0]])) - { - return array( - 'markup' => '&'.$SpecialCharacter[$Excerpt['text'][0]].';', - 'extent' => 1, - ); - } + return; } protected function inlineStrikethrough($Excerpt) From 0205a4cbe640f0f5052974500d93c7b0404b7a15 Mon Sep 17 00:00:00 2001 From: Aidan Woods Date: Sun, 18 Mar 2018 22:46:08 +0000 Subject: [PATCH 5/8] Use rawHtml to provide conditional escaping on special chars --- Parsedown.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Parsedown.php b/Parsedown.php index ec0f0a9..546beae 100644 --- a/Parsedown.php +++ b/Parsedown.php @@ -1189,7 +1189,7 @@ class Parsedown if (isset($Excerpt['text'][1]) and in_array($Excerpt['text'][1], $this->specialCharacters)) { return array( - 'markup' => $Excerpt['text'][1], + 'element' => array('rawHtml' => $Excerpt['text'][1]), 'extent' => 2, ); } From 8c14c5c239d700c70832e84f744c0e469257acbc Mon Sep 17 00:00:00 2001 From: Aidan Woods Date: Sun, 18 Mar 2018 22:46:55 +0000 Subject: [PATCH 6/8] Use rawHtml to provide conditional escaping for markup --- Parsedown.php | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Parsedown.php b/Parsedown.php index 546beae..e261c2f 100644 --- a/Parsedown.php +++ b/Parsedown.php @@ -386,7 +386,7 @@ class Parsedown if (isset($Line['text'][3]) and $Line['text'][3] === '-' and $Line['text'][2] === '-' and $Line['text'][1] === '!') { $Block = array( - 'markup' => $Line['body'], + 'element' => array('rawHtml' => $Line['body']), ); if (preg_match('/-->$/', $Line['text'])) @@ -405,7 +405,7 @@ class Parsedown return; } - $Block['markup'] .= "\n" . $Line['body']; + $Block['element']['rawHtml'] .= "\n" . $Line['body']; if (preg_match('/-->$/', $Line['text'])) { @@ -734,7 +734,7 @@ class Parsedown $Block = array( 'name' => $matches[1], - 'markup' => $Line['text'], + 'element' => array('rawHtml' => $Line['text']), ); return $Block; @@ -748,7 +748,7 @@ class Parsedown return; } - $Block['markup'] .= "\n".$Line['body']; + $Block['element']['rawHtml'] .= "\n".$Line['body']; return $Block; } @@ -1311,7 +1311,7 @@ class Parsedown if ($Excerpt['text'][1] === '/' and preg_match('/^<\/\w[\w-]*[ ]*>/s', $Excerpt['text'], $matches)) { return array( - 'markup' => $matches[0], + 'element' => array('rawHtml' => $matches[0]), 'extent' => strlen($matches[0]), ); } @@ -1319,7 +1319,7 @@ class Parsedown if ($Excerpt['text'][1] === '!' and preg_match('/^/s', $Excerpt['text'], $matches)) { return array( - 'markup' => $matches[0], + 'element' => array('rawHtml' => $matches[0]), 'extent' => strlen($matches[0]), ); } @@ -1327,7 +1327,7 @@ class Parsedown if ($Excerpt['text'][1] !== ' ' and preg_match('/^<\w[\w-]*(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*\/?>/s', $Excerpt['text'], $matches)) { return array( - 'markup' => $matches[0], + 'element' => array('rawHtml' => $matches[0]), 'extent' => strlen($matches[0]), ); } From e59fbd736d98db6031824c9b3402f184d6d46c3d Mon Sep 17 00:00:00 2001 From: Aidan Woods Date: Sun, 18 Mar 2018 22:48:52 +0000 Subject: [PATCH 7/8] Remove 'markup' key exception for outputting via AST --- Parsedown.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Parsedown.php b/Parsedown.php index e261c2f..0e82ed5 100644 --- a/Parsedown.php +++ b/Parsedown.php @@ -294,7 +294,7 @@ class Parsedown } $markup .= "\n"; - $markup .= isset($Block['markup']) ? $Block['markup'] : $this->element($Block['element']); + $markup .= $this->element($Block['element']); } $markup .= "\n"; @@ -1042,7 +1042,7 @@ class Parsedown $markup .= $this->unmarkedText($unmarkedText); # compile the inline - $markup .= isset($Inline['markup']) ? $Inline['markup'] : $this->element($Inline['element']); + $markup .= $this->element($Inline['element']); # remove the examined text $text = substr($text, $Inline['position'] + $Inline['extent']); From 1fa6b038af2eb7cc34f7989cab39c5c62ba201e1 Mon Sep 17 00:00:00 2001 From: Aidan Woods Date: Sun, 18 Mar 2018 23:06:26 +0000 Subject: [PATCH 8/8] PHP 5.3 compat --- Parsedown.php | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Parsedown.php b/Parsedown.php index 0e82ed5..e015be8 100644 --- a/Parsedown.php +++ b/Parsedown.php @@ -1416,7 +1416,8 @@ class Parsedown protected function unmarkedText($text) { - return $this->element($this->inlineText($text)['element']); + $Inline = $this->inlineText($text); + return $this->element($Inline['element']); } #