1
0
mirror of https://github.com/erusev/parsedown.git synced 2025-09-03 11:52:36 +02:00

Merge branch 'master' into patch-4

This commit is contained in:
Aidan Woods
2018-03-27 11:13:06 +01:00
committed by GitHub
48 changed files with 754 additions and 213 deletions

View File

@@ -17,7 +17,7 @@ class Parsedown
{
# ~
const version = '1.6.0';
const version = '1.7.1';
# ~
@@ -75,6 +75,32 @@ class Parsedown
protected $urlsLinked = true;
function setSafeMode($safeMode)
{
$this->safeMode = (bool) $safeMode;
return $this;
}
protected $safeMode;
protected $safeLinksWhitelist = array(
'http://',
'https://',
'ftp://',
'ftps://',
'mailto:',
'data:image/png;base64,',
'data:image/gif;base64,',
'data:image/jpeg;base64,',
'irc:',
'ircs:',
'git:',
'ssh:',
'news:',
'steam:',
);
#
# Lines
#
@@ -342,8 +368,6 @@ class Parsedown
{
$text = $Block['element']['text']['text'];
$text = htmlspecialchars($text, ENT_NOQUOTES, 'UTF-8');
$Block['element']['text']['text'] = $text;
return $Block;
@@ -354,7 +378,7 @@ class Parsedown
protected function blockComment($Line)
{
if ($this->markupEscaped)
if ($this->markupEscaped or $this->safeMode)
{
return;
}
@@ -396,7 +420,7 @@ class Parsedown
protected function blockFencedCode($Line)
{
if (preg_match('/^['.$Line['text'][0].']{3,}[ ]*([\w-]+)?[ ]*$/', $Line['text'], $matches))
if (preg_match('/^['.$Line['text'][0].']{3,}[ ]*([^`]+)?[ ]*$/', $Line['text'], $matches))
{
$Element = array(
'name' => 'code',
@@ -448,7 +472,7 @@ class Parsedown
return $Block;
}
$Block['element']['text']['text'] .= "\n".$Line['body'];;
$Block['element']['text']['text'] .= "\n".$Line['body'];
return $Block;
}
@@ -457,8 +481,6 @@ class Parsedown
{
$text = $Block['element']['text']['text'];
$text = htmlspecialchars($text, ENT_NOQUOTES, 'UTF-8');
$Block['element']['text']['text'] = $text;
return $Block;
@@ -533,7 +555,7 @@ class Parsedown
),
);
if($name === 'ol')
if($name === 'ol')
{
$listStart = ltrim(strstr($matches[1], $Block['data']['markerType'], true), '0') ?: '0';
@@ -584,6 +606,8 @@ class Parsedown
{
$Block['li']['text'] []= '';
$Block['loose'] = true;
unset($Block['interrupted']);
}
@@ -641,6 +665,22 @@ class Parsedown
}
}
protected function blockListComplete(array $Block)
{
if (isset($Block['loose']))
{
foreach ($Block['element']['text'] as &$li)
{
if (end($li['text']) !== '')
{
$li['text'] []= '';
}
}
}
return $Block;
}
#
# Quote
@@ -711,8 +751,10 @@ class Parsedown
return;
}
if (chop($Line['text'], $Line['text'][0]) === '')
{
if (
chop(chop($Line['text'], ' '), $Line['text'][0]) === ''
and $Line['indent'] < 4
) {
$Block['element']['name'] = $Line['text'][0] === '=' ? 'h1' : 'h2';
return $Block;
@@ -724,12 +766,12 @@ class Parsedown
protected function blockMarkup($Line)
{
if ($this->markupEscaped)
if ($this->markupEscaped or $this->safeMode)
{
return;
}
if (preg_match('/^<(\w*)(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*(\/)?>/', $Line['text'], $matches))
if (preg_match('/^<[\/]?+(\w*)(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*(\/)?>/', $Line['text'], $matches))
{
$element = strtolower($matches[1]);
@@ -740,71 +782,20 @@ class Parsedown
$Block = array(
'name' => $matches[1],
'depth' => 0,
'markup' => $Line['text'],
);
$length = strlen($matches[0]);
$remainder = substr($Line['text'], $length);
if (trim($remainder) === '')
{
if (isset($matches[2]) or in_array($matches[1], $this->voidElements))
{
$Block['closed'] = true;
$Block['void'] = true;
}
}
else
{
if (isset($matches[2]) or in_array($matches[1], $this->voidElements))
{
return;
}
if (preg_match('/<\/'.$matches[1].'>[ ]*$/i', $remainder))
{
$Block['closed'] = true;
}
}
return $Block;
}
}
protected function blockMarkupContinue($Line, array $Block)
{
if (isset($Block['closed']))
if (isset($Block['closed']) or isset($Block['interrupted']))
{
return;
}
if (preg_match('/^<'.$Block['name'].'(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*>/i', $Line['text'])) # open
{
$Block['depth'] ++;
}
if (preg_match('/(.*?)<\/'.$Block['name'].'>[ ]*$/i', $Line['text'], $matches)) # close
{
if ($Block['depth'] > 0)
{
$Block['depth'] --;
}
else
{
$Block['closed'] = true;
}
}
if (isset($Block['interrupted']))
{
$Block['markup'] .= "\n";
unset($Block['interrupted']);
}
$Block['markup'] .= "\n".$Line['body'];
return $Block;
@@ -1043,7 +1034,7 @@ class Parsedown
# ~
#
public function line($text)
public function line($text, $nonNestables=array())
{
$markup = '';
@@ -1059,6 +1050,13 @@ class Parsedown
foreach ($this->InlineTypes[$marker] as $inlineType)
{
# check to see if the current inline type is nestable in the current context
if ( ! empty($nonNestables) and in_array($inlineType, $nonNestables))
{
continue;
}
$Inline = $this->{'inline'.$inlineType}($Excerpt);
if ( ! isset($Inline))
@@ -1080,6 +1078,13 @@ class Parsedown
$Inline['position'] = $markerPosition;
}
# cause the new element to 'inherit' our non nestables
foreach ($nonNestables as $non_nestable)
{
$Inline['element']['nonNestables'][] = $non_nestable;
}
# the text that comes before the inline
$unmarkedText = substr($text, 0, $Inline['position']);
@@ -1120,7 +1125,6 @@ class Parsedown
if (preg_match('/^('.$marker.'+)[ ]*(.+?)[ ]*(?<!'.$marker.')\1(?!'.$marker.')/s', $Excerpt['text'], $matches))
{
$text = $matches[2];
$text = htmlspecialchars($text, ENT_NOQUOTES, 'UTF-8');
$text = preg_replace("/[ ]*\n/", ' ', $text);
return array(
@@ -1135,8 +1139,14 @@ class Parsedown
protected function inlineEmailTag($Excerpt)
{
if (strpos($Excerpt['text'], '>') !== false and preg_match('/^<((mailto:)?\S+?@\S+?)>/i', $Excerpt['text'], $matches))
{
$hostnameLabel = '[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?';
$commonMarkEmail = '[a-zA-Z0-9.!#$%&\'*+\/=?^_`{|}~-]++@'
. $hostnameLabel . '(?:\.' . $hostnameLabel . ')*';
if (strpos($Excerpt['text'], '>') !== false
and preg_match("/^<((mailto:)?$commonMarkEmail)>/i", $Excerpt['text'], $matches)
){
$url = $matches[1];
if ( ! isset($matches[2]))
@@ -1239,6 +1249,7 @@ class Parsedown
$Element = array(
'name' => 'a',
'handler' => 'line',
'nonNestables' => array('Url', 'Link'),
'text' => null,
'attributes' => array(
'href' => null,
@@ -1250,7 +1261,7 @@ class Parsedown
$remainder = $Excerpt['text'];
if (preg_match('/\[((?:[^][]|(?R))*)\]/', $remainder, $matches))
if (preg_match('/\[((?:[^][]++|(?R))*+)\]/', $remainder, $matches))
{
$Element['text'] = $matches[1];
@@ -1263,7 +1274,7 @@ class Parsedown
return;
}
if (preg_match('/^[(]((?:[^ ()]|[(][^ )]+[)])+)(?:[ ]+("[^"]*"|\'[^\']*\'))?[)]/', $remainder, $matches))
if (preg_match('/^[(]\s*+((?:[^ ()]++|[(][^ )]+[)])++)(?:[ ]+("[^"]*"|\'[^\']*\'))?\s*[)]/', $remainder, $matches))
{
$Element['attributes']['href'] = $matches[1];
@@ -1299,8 +1310,6 @@ class Parsedown
$Element['attributes']['title'] = $Definition['title'];
}
$Element['attributes']['href'] = str_replace(array('&', '<'), array('&amp;', '&lt;'), $Element['attributes']['href']);
return array(
'extent' => $extent,
'element' => $Element,
@@ -1309,12 +1318,12 @@ class Parsedown
protected function inlineMarkup($Excerpt)
{
if ($this->markupEscaped or strpos($Excerpt['text'], '>') === false)
if ($this->markupEscaped or $this->safeMode or strpos($Excerpt['text'], '>') === false)
{
return;
}
if ($Excerpt['text'][1] === '/' and preg_match('/^<\/\w*[ ]*>/s', $Excerpt['text'], $matches))
if ($Excerpt['text'][1] === '/' and preg_match('/^<\/\w[\w-]*[ ]*>/s', $Excerpt['text'], $matches))
{
return array(
'markup' => $matches[0],
@@ -1330,7 +1339,7 @@ class Parsedown
);
}
if ($Excerpt['text'][1] !== ' ' and preg_match('/^<\w*(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*\/?>/s', $Excerpt['text'], $matches))
if ($Excerpt['text'][1] !== ' ' and preg_match('/^<\w[\w-]*(?:[ ]*'.$this->regexHtmlAttribute.')*[ ]*\/?>/s', $Excerpt['text'], $matches))
{
return array(
'markup' => $matches[0],
@@ -1389,14 +1398,16 @@ class Parsedown
if (preg_match('/\bhttps?:[\/]{2}[^\s<]+\b\/*/ui', $Excerpt['context'], $matches, PREG_OFFSET_CAPTURE))
{
$url = $matches[0][0];
$Inline = array(
'extent' => strlen($matches[0][0]),
'position' => $matches[0][1],
'element' => array(
'name' => 'a',
'text' => $matches[0][0],
'text' => $url,
'attributes' => array(
'href' => $matches[0][0],
'href' => $url,
),
),
);
@@ -1409,7 +1420,7 @@ class Parsedown
{
if (strpos($Excerpt['text'], '>') !== false and preg_match('/^<(\w+:\/{2}[^ >]+)>/i', $Excerpt['text'], $matches))
{
$url = str_replace(array('&', '<'), array('&amp;', '&lt;'), $matches[1]);
$url = $matches[1];
return array(
'extent' => strlen($matches[0]),
@@ -1447,37 +1458,74 @@ class Parsedown
protected function element(array $Element)
{
$markup = '<'.$Element['name'];
if (isset($Element['attributes']))
if ($this->safeMode)
{
foreach ($Element['attributes'] as $name => $value)
{
if ($value === null)
{
continue;
}
$Element = $this->sanitiseElement($Element);
}
$markup .= ' '.$name.'="'.$value.'"';
$hasName = isset($Element['name']);
$markup = '';
if ($hasName)
{
$markup .= '<'.$Element['name'];
if (isset($Element['attributes']))
{
foreach ($Element['attributes'] as $name => $value)
{
if ($value === null)
{
continue;
}
$markup .= ' '.$name.'="'.self::escape($value).'"';
}
}
}
$permitRawHtml = false;
if (isset($Element['text']))
{
$markup .= '>';
$text = $Element['text'];
}
// very strongly consider an alternative if you're writing an
// extension
elseif (isset($Element['rawHtml']))
{
$text = $Element['rawHtml'];
$allowRawHtmlInSafeMode = isset($Element['allowRawHtmlInSafeMode']) && $Element['allowRawHtmlInSafeMode'];
$permitRawHtml = !$this->safeMode || $allowRawHtmlInSafeMode;
}
if (isset($text))
{
$markup .= $hasName ? '>' : '';
if (!isset($Element['nonNestables']))
{
$Element['nonNestables'] = array();
}
if (isset($Element['handler']))
{
$markup .= $this->{$Element['handler']}($Element['text']);
$markup .= $this->{$Element['handler']}($text, $Element['nonNestables']);
}
elseif (!$permitRawHtml)
{
$markup .= self::escape($text, true);
}
else
{
$markup .= $Element['text'];
$markup .= $text;
}
$markup .= '</'.$Element['name'].'>';
$markup .= $hasName ? '</'.$Element['name'].'>' : '';
}
else
elseif ($hasName)
{
$markup .= ' />';
}
@@ -1531,10 +1579,77 @@ class Parsedown
return $markup;
}
protected function sanitiseElement(array $Element)
{
static $goodAttribute = '/^[a-zA-Z0-9][a-zA-Z0-9-_]*+$/';
static $safeUrlNameToAtt = array(
'a' => 'href',
'img' => 'src',
);
if (isset($safeUrlNameToAtt[$Element['name']]))
{
$Element = $this->filterUnsafeUrlInAttribute($Element, $safeUrlNameToAtt[$Element['name']]);
}
if ( ! empty($Element['attributes']))
{
foreach ($Element['attributes'] as $att => $val)
{
# filter out badly parsed attribute
if ( ! preg_match($goodAttribute, $att))
{
unset($Element['attributes'][$att]);
}
# dump onevent attribute
elseif (self::striAtStart($att, 'on'))
{
unset($Element['attributes'][$att]);
}
}
}
return $Element;
}
protected function filterUnsafeUrlInAttribute(array $Element, $attribute)
{
foreach ($this->safeLinksWhitelist as $scheme)
{
if (self::striAtStart($Element['attributes'][$attribute], $scheme))
{
return $Element;
}
}
$Element['attributes'][$attribute] = str_replace(':', '%3A', $Element['attributes'][$attribute]);
return $Element;
}
#
# Static Methods
#
protected static function escape($text, $allowQuotes = false)
{
return htmlspecialchars($text, $allowQuotes ? ENT_NOQUOTES : ENT_QUOTES, 'UTF-8');
}
protected static function striAtStart($string, $needle)
{
$len = strlen($needle);
if ($len > strlen($string))
{
return false;
}
else
{
return strtolower(substr($string, 0, $len)) === strtolower($needle);
}
}
static function instance($name = 'default')
{
if (isset(self::$instances[$name]))
@@ -1585,10 +1700,10 @@ class Parsedown
'b', 'em', 'big', 'cite', 'small', 'spacer', 'listing',
'i', 'rp', 'del', 'code', 'strike', 'marquee',
'q', 'rt', 'ins', 'font', 'strong',
's', 'tt', 'sub', 'mark',
'u', 'xm', 'sup', 'nobr',
'var', 'ruby',
'wbr', 'span',
'time',
's', 'tt', 'kbd', 'mark',
'u', 'xm', 'sub', 'nobr',
'sup', 'ruby',
'var', 'span',
'wbr', 'time',
);
}