update to CommonMark support

This commit is contained in:
祁宁 2014-09-26 21:40:55 +08:00
parent 146b289c42
commit 3e5a01637b
21 changed files with 4858 additions and 1890 deletions

View File

@ -1,6 +1,7 @@
<?php if(!defined('__TYPECHO_ADMIN__')) exit; ?>
<?php $content = !empty($post) ? $post : $page; if ($options->markdown): ?>
<script src="<?php $options->adminStaticUrl('js', 'pagedown.js?v=' . $suffixVersion); ?>"></script>
<script src="<?php $options->adminStaticUrl('js', 'stmd.js?v=' . $suffixVersion); ?>"></script>
<script src="<?php $options->adminStaticUrl('js', 'diff.js?v=' . $suffixVersion); ?>"></script>
<script>
$(document).ready(function () {
@ -56,8 +57,7 @@ $(document).ready(function () {
help: '<?php _e('Markdown语法帮助'); ?>'
};
var converter = new Typecho.Markdown('<?php $security->index('/action/contents-'
. (!empty($post) ? 'post' : 'page') . '-edit?do=preview'); ?>'),
var converter = new Typecho.Markdown,
editor = new Markdown.Editor(converter, '', options),
diffMatch = new diff_match_patch(), last = '', preview = $('#wmd-preview'),
mark = '@mark' + Math.ceil(Math.random() * 100000000) + '@',

View File

@ -257,8 +257,7 @@ else
panels = new PanelCollection(idPostfix);
var commandManager = new CommandManager(hooks, getString);
var previewManager = new PreviewManager(markdownConverter, panels,
function () { hooks.onPreviewRefresh(); });
var previewManager = new PreviewManager(markdownConverter, panels, function () { hooks.onPreviewRefresh(); });
var undoManager, uiManager;
if (!/\?noundo/.test(doc.location.href)) {
@ -368,7 +367,7 @@ else
var regexText;
var replacementText;
// chrome bug ... documented at: http://meta.stackoverflow.com/questions/63307/blockquote-glitch-in-editor-in-chrome-6-and-7/65985#65985
// chrome bug ... documented at: http://meta.stackexchange.com/questions/63307/blockquote-glitch-in-editor-in-chrome-6-and-7/65985#65985
if (navigator.userAgent.match(/Chrome/)) {
"X".match(/()./);
}
@ -1013,14 +1012,14 @@ else
var prevTime = new Date().getTime();
text = converter.makeHtml(text, pushPreviewHtml);
text = converter.makeHtml(text);
// Calculate the processing time of the HTML creation.
// It's used as the delay time in the event listener.
var currTime = new Date().getTime();
elapsedTime = currTime - prevTime;
// pushPreviewHtml(text);
pushPreviewHtml(text);
};
// setTimeout is already used. Used as an event listener.
@ -1159,9 +1158,9 @@ else
var background = doc.createElement("div"),
style = background.style;
background.className = "wmd-prompt-background";
style.position = "absolute";
style.top = "0";
@ -1993,14 +1992,39 @@ else
// sure the URL and the optinal title are "nice".
function properlyEncoded(linkdef) {
return linkdef.replace(/^\s*(.*?)(?:\s+"(.+)")?\s*$/, function (wholematch, link, title) {
link = link.replace(/\?.*$/, function (querypart) {
return querypart.replace(/\+/g, " "); // in the query string, a plus and a space are identical
});
link = decodeURIComponent(link); // unencode first, to prevent double encoding
link = encodeURI(link).replace(/'/g, '%27').replace(/\(/g, '%28').replace(/\)/g, '%29');
link = link.replace(/\?.*$/, function (querypart) {
return querypart.replace(/\+/g, "%2b"); // since we replaced plus with spaces in the query part, all pluses that now appear where originally encoded
});
var inQueryString = false;
// The last alternative, `[^\w\d-./]`, is just a shortcut that lets us skip
// the most common characters in URLs. Replacing it with `.` would not change
// the result, because encodeURI returns those characters unchanged, but it
// would mean lots of unnecessary replacement calls
link = link.replace(/%(?:[\da-fA-F]{2})|\?|\+|[^\w\d-./]/g, function (match) {
// Valid percent encoding. Could just return it as is, but we follow RFC3986
// Section 2.1 which says "For consistency, URI producers and normalizers
// should use uppercase hexadecimal digits for all percent-encodings."
// Note that we also handle (illegal) stand-alone percent characters by
// replacing them with "%25"
if (match.length === 3 && match.charAt(0) == "%") {
return match.toUpperCase();
}
switch (match) {
case "?":
inQueryString = true;
return "?";
break;
// In the query string, a plus and a space are identical -- normalize.
// Not strictly necessary, but identical behavior to the previous version
// of this function.
case "+":
if (inQueryString)
return "%20";
break;
}
return encodeURI(match);
})
if (title) {
title = title.trim ? title.trim() : title.replace(/^\s*/, "").replace(/\s*$/, "");
title = title.replace(/"/g, "quot;").replace(/\(/g, "&#40;").replace(/\)/g, "&#41;").replace(/</g, "&lt;").replace(/>/g, "&gt;");
@ -2023,7 +2047,7 @@ else
}
else {
// We're moving start and end tag back into the selection, since (as we're in the else block) we're not
// *removing* a link, but *adding* one, so whatever findTags() found is now back to being part of the
// link text. linkEnteredCallback takes care of escaping any brackets.
@ -2061,7 +2085,7 @@ else
// would mean a zero-width match at the start. Since zero-width matches advance the string position,
// the first bracket could then not act as the "not a backslash" for the second.
chunk.selection = (" " + chunk.selection).replace(/([^\\](?:\\\\)*)(?=[[\]])/g, "$1\\").substr(1);
var linkDef = " [999]: " + properlyEncoded(link);
var num = that.addLinkDef(chunk, linkDef);
@ -2071,18 +2095,10 @@ else
if (!chunk.selection) {
if (isImage) {
var imagename = that.getString("imagename");
if (!!imagename) {
imagename = imagename.replace(/_/g, '\\_');
}
chunk.selection = imagename || that.getString("imagedescription");
}
else {
var linkname = that.getString("linkname");
if (!!linkname) {
linkname = linkname.replace(/_/g, '\\_');
}
chunk.selection = linkname || that.getString("linkdescription");
}
}
@ -2114,7 +2130,7 @@ else
chunk.before = chunk.before.replace(/(\n|^)[ ]{0,3}([*+-]|\d+[.])[ \t]*\n$/, "\n\n");
chunk.before = chunk.before.replace(/(\n|^)[ ]{0,3}>[ \t]*\n$/, "\n\n");
chunk.before = chunk.before.replace(/(\n|^)[ \t]+\n$/, "\n\n");
// There's no selection, end the cursor wasn't at the end of the line:
// The user wants to split the current list item / code line / blockquote line
// (for the latter it doesn't really matter) in two. Temporarily select the
@ -2142,7 +2158,7 @@ else
commandMgr.doCode(chunk);
}
}
if (fakeSelection) {
chunk.after = chunk.selection + chunk.after;
chunk.selection = "";
@ -2192,7 +2208,7 @@ else
//
// Since this is essentially a backwards-moving regex, it's susceptible to
// catstrophic backtracking and can cause the browser to hang;
// see e.g. http://meta.stackoverflow.com/questions/9807.
// see e.g. http://meta.stackexchange.com/questions/9807.
//
// Hence we replaced this by a simple state machine that just goes through the
// lines and checks for a), b), and c).

1547
admin/js/stmd.js Executable file

File diff suppressed because it is too large Load Diff

View File

@ -170,42 +170,19 @@
(function () {
Typecho.Markdown = function (url) {
this.lastHtml = '';
this.lastText = '';
this.currentTime = new Date().getTime();
this.lock = false;
this.first = true;
this.cb = null;
Typecho.Markdown = function () {
this.writer = new stmd.HtmlRenderer();
this.reader = new stmd.DocParser();
this.hooks = new Markdown.HookCollection()
this.hooks.addNoop('postConversion');
var self = this;
setInterval(function () {
var currentTime = new Date().getTime();
if (!self.lock && self.cb && (self.first || currentTime - self.currentTime > 1000)) {
self.lock = true;
self.first = false;
$.post(url, {text : self.lastText}, function (html) {
html = self.hooks.postConversion(html);
self.cb(html);
self.cb = null;
self.lastHtml = html;
self.lock = false;
}, 'json');
}
}, 500);
};
Typecho.Markdown.prototype.makeHtml = function (text, pushPreviewHtml) {
this.lastText = text;
this.currentTime = new Date().getTime();
this.cb = pushPreviewHtml;
Typecho.Markdown.prototype.makeHtml = function (text) {
var doc = this.reader.parse(text),
html = this.writer.renderBlock(doc);
return this.lastHtml;
return this.hooks.postConversion(html);
};
})();

View File

@ -7,7 +7,7 @@ DIR=../
update:
@echo 'git update'
rm -Rf build/
git clone https://github.com/typecho/typecho-replica.git build
git clone https://github.com/typecho/typecho.git build
rm -Rf build/.git
rm -f build/.gitignore
rm -f build/.gitattributes
@ -33,6 +33,21 @@ package:
tar -cvvzf build.tar.gz build/
commonmark-php:
rm -rf ../var/CommonMark
git clone git@github.com:colinodell/commonmark-php.git commonmark
php transfer.php commonmark/src CommonMark ColinODell/CommonMark
mv commonmark/src ../var/CommonMark
rm -rf commonmark
commonmark-js:
rm -rf ../admin/js/stmd.js
git clone https://github.com/jgm/stmd.git
cp stmd/js/stmd.js ../admin/js/
rm -rf stmd
clear:
rm -Rf build/

107
tools/transfer.php Normal file
View File

@ -0,0 +1,107 @@
<?php
// transfer namespace based php class to dashed
$dir = $argv[1];
$ns = $argv[2];
$fake = $argv[3];
$files = $files = new RecursiveIteratorIterator(new RecursiveDirectoryIterator($dir,
FilesystemIterator::KEY_AS_PATHNAME
| FilesystemIterator::CURRENT_AS_FILEINFO | FilesystemIterator::SKIP_DOTS));
$map = [];
$lists = [];
$offset = strlen($dir);
foreach ($files as $file) {
$path = $file->getPathname();
$file = $file->getFilename();
$lists[] = $path;
$dir = dirname($path);
if ($file[0] == '.') {
continue;
}
$path = ltrim(substr($path, $offset), '/\\');
list($class) = explode('.', $path);
$name = str_replace(['/', '\\'], '\\', $fake . '\\' . $class);
$class = str_replace(['/', '\\'], '_', $ns . '_' . $class);
$map[$name] = $class;
}
foreach ($lists as $file) {
$dir = dirname($file);
$source = file_get_contents($file);
$replace = [];
$current = '';
$source = preg_replace_callback("/\nnamespace\s*([a-z_\\\]+);/is", function ($matches) use ($map, $file, &$replace, &$current) {
$matches[1] .= '\\' . pathinfo($file, PATHINFO_FILENAME);
$parts = explode('\\', $matches[1]);
$last = array_pop($parts);
if (isset($map[$matches[1]])) {
$replace[$last] = $map[$matches[1]];
$current = $matches[1];
}
return '';
}, $source);
$source = preg_replace_callback("/\nuse\s*([a-z_\\\]+)(?:\s+as\s+([a-z_\\\]+))?;/is", function ($matches) use ($map, &$replace) {
$parts = explode('\\', $matches[1]);
$last = array_pop($parts);
if (isset($map[$matches[1]])) {
$replace[$last] = $map[$matches[1]];
}
return '';
}, $source);
foreach ($map as $key => $val) {
if (count(explode('\\', $key)) == count(explode('\\', $current))) {
$parts = explode('_', $val);
$last = array_pop($parts);
if (!isset($replace[$last])) {
$replace[$last] = $val;
}
}
}
$tokens = token_get_all($source);
$source = '';
$last = false;
foreach ($tokens as $key => $token) {
if (!is_array($token)) {
$source .= $token;
$last = false;
continue;
}
list ($name, $str) = $token;
if (T_STRING == $name) {
$str = isset($replace[$str]) ? $replace[$str] : $str;
} else if (T_NS_SEPARATOR == $name) {
if (T_STRING == $last) {
$source = substr($source, 0, - strlen($tokens[$key - 1][1]));
}
$str = '';
}
$last = $name;
$source .= $str;
}
file_put_contents($file, $source);
}

View File

@ -0,0 +1,39 @@
<?php
/*
* This file is part of the commonmark-php package.
*
* (c) Colin O'Dell <colinodell@gmail.com>
*
* Original code based on stmd.js
* - (c) John MacFarlane
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Converts CommonMark-compatible Markdown to HTML
*/
class CommonMark_CommonMarkConverter
{
/**
* Converts CommonMark to HTML
* @param string $commonMark
*
* @return string
*
* @api
*/
public function convertToHtml($commonMark)
{
$docParser = new CommonMark_DocParser();
$renderer = new CommonMark_HtmlRenderer();
$documentAST = $docParser->parse($commonMark);
$html = $renderer->render($documentAST);
return $html;
}
}

View File

@ -0,0 +1,595 @@
<?php
/*
* This file is part of the commonmark-php package.
*
* (c) Colin O'Dell <colinodell@gmail.com>
*
* Original code based on stmd.js
* - (c) John MacFarlane
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Parses Markdown into an AST
*/
class CommonMark_DocParser
{
const CODE_INDENT = 4;
/**
* @var BlockElement
*/
protected $tip;
/**
* @var BlockElement
*/
protected $doc;
/**
* @var InlineParser
*/
protected $inlineParser;
/**
* @var ReferenceMap
*/
protected $refMap;
/**
* Convert tabs to spaces on each line using a 4-space tab stop
* @param string $string
*
* @return string
*/
protected static function detabLine($string)
{
if (strpos($string, "\t") === false) {
return $string;
}
// Split into different parts
$parts = explode("\t", $string);
// Add each part to the resulting line
// The first one is done here; others are prefixed
// with the necessary spaces inside the loop below
$line = $parts[0];
unset($parts[0]);
foreach ($parts as $part) {
// Calculate number of spaces; insert them followed by the non-tab contents
$amount = 4 - mb_strlen($line, 'UTF-8') % 4;
$line .= str_repeat(' ', $amount) . $part;
}
return $line;
}
/**
* Break out of all containing lists, resetting the tip of the
* document to the parent of the highest list, and finalizing
* all the lists. (This is used to implement the "two blank lines
* break of of all lists" feature.)
*
* @param BlockElement $block
* @param int $lineNumber
*/
protected function breakOutOfLists(CommonMark_Element_BlockElement $block, $lineNumber)
{
$b = $block;
$lastList = null;
do {
if ($b->getType() === CommonMark_Element_BlockElement::TYPE_LIST) {
$lastList = $b;
}
$b = $b->getParent();
} while ($b);
if ($lastList) {
while ($block != $lastList) {
$this->finalize($block, $lineNumber);
$block = $block->getParent();
}
$this->finalize($lastList, $lineNumber);
$this->tip = $lastList->getParent();
}
}
/**
* @param string $ln
* @param int $offset
*
* @throws \RuntimeException
*/
protected function addLine($ln, $offset)
{
$s = substr($ln, $offset);
if ($s === false) {
$s = '';
}
if (!$this->tip->getIsOpen()) {
throw new RuntimeException(sprintf('Attempted to add line (%s) to closed container.', $ln));
}
$this->tip->getStrings()->add($s);
}
/**
* @param string $tag
* @param int $lineNumber
* @param int $offset
*
* @return BlockElement
*/
protected function addChild($tag, $lineNumber, $offset)
{
while (!$this->tip->canContain($tag)) {
$this->finalize($this->tip, $lineNumber);
}
$columnNumber = $offset + 1; // offset 0 = column 1
$newBlock = new CommonMark_Element_BlockElement($tag, $lineNumber, $columnNumber);
$this->tip->getChildren()->add($newBlock);
$newBlock->setParent($this->tip);
$this->tip = $newBlock;
return $newBlock;
}
/**
* @param string $ln
* @param int $offset
*
* @return array|null
*/
protected function parseListMarker($ln, $offset)
{
$rest = substr($ln, $offset);
$data = array();
if (preg_match(CommonMark_Util_RegexHelper::getInstance()->getHRuleRegex(), $rest)) {
return null;
}
if ($matches = CommonMark_Util_RegexHelper::matchAll('/^[*+-]( +|$)/', $rest)) {
$spacesAfterMarker = strlen($matches[1]);
$data['type'] = CommonMark_Element_BlockElement::LIST_TYPE_UNORDERED;
$data['delimiter'] = null;
$data['bullet_char'] = $matches[0][0];
} elseif ($matches = CommonMark_Util_RegexHelper::matchAll('/^(\d+)([.)])( +|$)/', $rest)) {
$spacesAfterMarker = strlen($matches[3]);
$data['type'] = CommonMark_Element_BlockElement::LIST_TYPE_ORDERED;
$data['start'] = intval($matches[1]);
$data['delimiter'] = $matches[2];
$data['bullet_char'] = null;
} else {
return null;
}
$blankItem = strlen($matches[0]) === strlen($rest);
if ($spacesAfterMarker >= 5 ||
$spacesAfterMarker < 1 ||
$blankItem
) {
$data['padding'] = strlen($matches[0]) - $spacesAfterMarker + 1;
} else {
$data['padding'] = strlen($matches[0]);
}
return $data;
}
/**
* @param array $listData
* @param array $itemData
*
* @return bool
*/
protected function listsMatch($listData, $itemData)
{
return ($listData['type'] === $itemData['type'] &&
$listData['delimiter'] === $itemData['delimiter'] &&
$listData['bullet_char'] === $itemData['bullet_char']);
}
/**
* @param string $ln
* @param int $lineNumber
*/
protected function incorporateLine($ln, $lineNumber)
{
$allMatched = true;
$offset = 0;
$blank = false;
$container = $this->doc;
$oldTip = $this->tip;
// Convert tabs to spaces:
$ln = self::detabLine($ln);
// For each containing block, try to parse the associated line start.
// Bail out on failure: container will point to the last matching block.
// Set all_matched to false if not all containers match.
while ($container->hasChildren()) {
/** @var BlockElement $lastChild */
$lastChild = $container->getChildren()->last();
if (!$lastChild->getIsOpen()) {
break;
}
$container = $lastChild;
$match = CommonMark_Util_RegexHelper::matchAt('/[^ ]/', $ln, $offset);
if ($match === null) {
$firstNonSpace = strlen($ln);
$blank = true;
} else {
$firstNonSpace = $match;
$blank = false;
}
$indent = $firstNonSpace - $offset;
switch ($container->getType()) {
case CommonMark_Element_BlockElement::TYPE_BLOCK_QUOTE:
$matched = $indent <= 3 && isset($ln[$firstNonSpace]) && $ln[$firstNonSpace] === '>';
if ($matched) {
$offset = $firstNonSpace + 1;
if (isset($ln[$offset]) && $ln[$offset] === ' ') {
$offset++;
}
} else {
$allMatched = false;
}
break;
case CommonMark_Element_BlockElement::TYPE_LIST_ITEM:
$listData = $container->getExtra('list_data');
$increment = $listData['marker_offset'] + $listData['padding'];
if ($indent >= $increment) {
$offset += $increment;
} elseif ($blank) {
$offset = $firstNonSpace;
} else {
$allMatched = false;
}
break;
case CommonMark_Element_BlockElement::TYPE_INDENTED_CODE:
if ($indent >= self::CODE_INDENT) {
$offset += self::CODE_INDENT;
} elseif ($blank) {
$offset = $firstNonSpace;
} else {
$allMatched = false;
}
break;
case CommonMark_Element_BlockElement::TYPE_ATX_HEADER:
case CommonMark_Element_BlockElement::TYPE_SETEXT_HEADER:
case CommonMark_Element_BlockElement::TYPE_HORIZONTAL_RULE:
// a header can never contain > 1 line, so fail to match:
$allMatched = false;
break;
case CommonMark_Element_BlockElement::TYPE_FENCED_CODE:
// skip optional spaces of fence offset
$i = $container->getExtra('fence_offset');
while ($i > 0 && $ln[$offset] === ' ') {
$offset++;
$i--;
}
break;
case CommonMark_Element_BlockElement::TYPE_HTML_BLOCK:
if ($blank) {
$allMatched = false;
}
break;
case CommonMark_Element_BlockElement::TYPE_PARAGRAPH:
if ($blank) {
$container->setIsLastLineBlank(true);
$allMatched = false;
}
break;
default:
// Nothing
}
if (!$allMatched) {
$container = $container->getParent(); // back up to the last matching block
break;
}
}
$lastMatchedContainer = $container;
// This function is used to finalize and close any unmatched
// blocks. We aren't ready to do this now, because we might
// have a lazy paragraph continuation, in which case we don't
// want to close unmatched blocks. So we store this closure for
// use later, when we have more information.
$closeUnmatchedBlocksAlreadyDone = false;
$closeUnmatchedBlocks = function (CommonMark_DocParser $self) use (
$oldTip,
$lastMatchedContainer,
$lineNumber,
&$closeUnmatchedBlocksAlreadyDone
) {
// finalize any blocks not matched
while (!$closeUnmatchedBlocksAlreadyDone && $oldTip != $lastMatchedContainer && $oldTip !== null) {
$self->finalize($oldTip, $lineNumber);
$oldTip = $oldTip->getParent();
}
$closeUnmatchedBlocksAlreadyDone = true;
};
// Check to see if we've hit 2nd blank line; if so break out of list:
if ($blank && $container->getIsLastLineBlank()) {
$this->breakOutOfLists($container, $lineNumber);
}
// Unless last matched container is a code block, try new container starts,
// adding children to the last matched container:
while ($container->getType() != CommonMark_Element_BlockElement::TYPE_FENCED_CODE &&
$container->getType() != CommonMark_Element_BlockElement::TYPE_INDENTED_CODE &&
$container->getType() != CommonMark_Element_BlockElement::TYPE_HTML_BLOCK &&
// this is a little performance optimization
CommonMark_Util_RegexHelper::matchAt('/^[ #`~*+_=<>0-9-]/', $ln, $offset) !== null
) {
$match = CommonMark_Util_RegexHelper::matchAt('/[^ ]/', $ln, $offset);
if ($match === null) {
$firstNonSpace = strlen($ln);
$blank = true;
} else {
$firstNonSpace = $match;
$blank = false;
}
$indent = $firstNonSpace - $offset;
if ($indent >= self::CODE_INDENT) {
// indented code
if ($this->tip->getType() != CommonMark_Element_BlockElement::TYPE_PARAGRAPH && !$blank) {
$offset += self::CODE_INDENT;
$closeUnmatchedBlocks($this);
$container = $this->addChild(CommonMark_Element_BlockElement::TYPE_INDENTED_CODE, $lineNumber, $offset);
} else { // ident > 4 in a lazy paragraph continuation
break;
}
} elseif (!$blank && $ln[$firstNonSpace] === '>') {
// blockquote
$offset = $firstNonSpace + 1;
// optional following space
if (isset($ln[$offset]) && $ln[$offset] === ' ') {
$offset++;
}
$closeUnmatchedBlocks($this);
$container = $this->addChild(CommonMark_Element_BlockElement::TYPE_BLOCK_QUOTE, $lineNumber, $offset);
} elseif ($match = CommonMark_Util_RegexHelper::matchAll('/^#{1,6}(?: +|$)/', $ln, $firstNonSpace)) {
// ATX header
$offset = $firstNonSpace + strlen($match[0]);
$closeUnmatchedBlocks($this);
$container = $this->addChild(CommonMark_Element_BlockElement::TYPE_ATX_HEADER, $lineNumber, $firstNonSpace);
$container->setExtra('level', strlen(trim($match[0]))); // number of #s
// remove trailing ###s
$container->getStrings()->add(
preg_replace(
'/(?:(\\\\#) *#*| *#+) *$/',
'$1',
substr($ln, $offset)
)
);
break;
} elseif ($match = CommonMark_Util_RegexHelper::matchAll('/^`{3,}(?!.*`)|^~{3,}(?!.*~)/', $ln, $firstNonSpace)) {
// fenced code block
$fenceLength = strlen($match[0]);
$closeUnmatchedBlocks($this);
$container = $this->addChild(CommonMark_Element_BlockElement::TYPE_FENCED_CODE, $lineNumber, $firstNonSpace);
$container->setExtra('fence_length', $fenceLength);
$container->setExtra('fence_char', $match[0][0]);
$container->setExtra('fence_offset', $firstNonSpace - $offset);
$offset = $firstNonSpace + $fenceLength;
break;
} elseif (CommonMark_Util_RegexHelper::matchAt(
CommonMark_Util_RegexHelper::getInstance()->getHtmlBlockOpenRegex(),
$ln,
$firstNonSpace
) !== null
) {
// html block
$closeUnmatchedBlocks($this);
$container = $this->addChild(CommonMark_Element_BlockElement::TYPE_HTML_BLOCK, $lineNumber, $firstNonSpace);
// note, we don't adjust offset because the tag is part of the text
break;
} elseif ($container->getType() === CommonMark_Element_BlockElement::TYPE_PARAGRAPH &&
$container->getStrings()->count() === 1 &&
($match = CommonMark_Util_RegexHelper::matchAll('/^(?:=+|-+) *$/', $ln, $firstNonSpace))
) {
// setext header line
$closeUnmatchedBlocks($this);
$container->setType(CommonMark_Element_BlockElement::TYPE_SETEXT_HEADER);
$container->setExtra('level', $match[0][0] === '=' ? 1 : 2);
$offset = strlen($ln);
} elseif (CommonMark_Util_RegexHelper::matchAt(CommonMark_Util_RegexHelper::getInstance()->getHRuleRegex(), $ln, $firstNonSpace) !== null) {
// hrule
$closeUnmatchedBlocks($this);
$container = $this->addChild(CommonMark_Element_BlockElement::TYPE_HORIZONTAL_RULE, $lineNumber, $firstNonSpace);
$offset = strlen($ln) - 1;
break;
} elseif (($data = $this->parseListMarker($ln, $firstNonSpace))) {
// list item
$closeUnmatchedBlocks($this);
$data['marker_offset'] = $indent;
$offset = $firstNonSpace + $data['padding'];
// add the list if needed
if ($container->getType() !== CommonMark_Element_BlockElement::TYPE_LIST ||
!($this->listsMatch($container->getExtra('list_data'), $data))
) {
$container = $this->addChild(CommonMark_Element_BlockElement::TYPE_LIST, $lineNumber, $firstNonSpace);
$container->setExtra('list_data', $data);
}
// add the list item
$container = $this->addChild(CommonMark_Element_BlockElement::TYPE_LIST_ITEM, $lineNumber, $firstNonSpace);
$container->setExtra('list_data', ($data));
} else {
break;
}
if ($container->acceptsLines()) {
// if it's a line container, it can't contain other containers
break;
}
}
// What remains at the offset is a text line. Add the text to the appropriate container.
$match = CommonMark_Util_RegexHelper::matchAt('/[^ ]/', $ln, $offset);
if ($match === null) {
$firstNonSpace = strlen($ln);
$blank = true;
} else {
$firstNonSpace = $match;
$blank = false;
}
$indent = $firstNonSpace - $offset;
// First check for a lazy paragraph continuation:
if ($this->tip !== $lastMatchedContainer &&
!$blank &&
$this->tip->getType() == CommonMark_Element_BlockElement::TYPE_PARAGRAPH &&
$this->tip->getStrings()->count() > 0
) {
// lazy paragraph continuation
$this->lastLineBlank = false; // TODO: really? (see line 1152)
$this->addLine($ln, $offset);
} else { // not a lazy continuation
//finalize any blocks not matched
$closeUnmatchedBlocks($this);
// Block quote lines are never blank as they start with >
// and we don't count blanks in fenced code for purposes of tight/loose
// lists or breaking out of lists. We also don't set last_line_blank
// on an empty list item.
$container->setIsLastLineBlank(
$blank &&
!(
$container->getType() == CommonMark_Element_BlockElement::TYPE_BLOCK_QUOTE ||
$container->getType() == CommonMark_Element_BlockElement::TYPE_FENCED_CODE ||
($container->getType() == CommonMark_Element_BlockElement::TYPE_LIST_ITEM &&
$container->getChildren()->count() === 0 &&
$container->getStartLine() == $lineNumber
)
)
);
$cont = $container;
while ($cont->getParent()) {
$cont->getParent()->setIsLastLineBlank(false);
$cont = $cont->getParent();
}
switch ($container->getType()) {
case CommonMark_Element_BlockElement::TYPE_INDENTED_CODE:
case CommonMark_Element_BlockElement::TYPE_HTML_BLOCK:
$this->addLine($ln, $offset);
break;
case CommonMark_Element_BlockElement::TYPE_FENCED_CODE:
// check for closing code fence
$test = ($indent <= 3 &&
isset($ln[$firstNonSpace]) &&
$ln[$firstNonSpace] == $container->getExtra('fence_char') &&
$match = CommonMark_Util_RegexHelper::matchAll('/^(?:`{3,}|~{3,})(?= *$)/', $ln, $firstNonSpace)
);
if ($test && strlen($match[0]) >= $container->getExtra('fence_length')) {
// don't add closing fence to container; instead, close it:
$this->finalize($container, $lineNumber);
} else {
$this->addLine($ln, $offset);
}
break;
case CommonMark_Element_BlockElement::TYPE_ATX_HEADER:
case CommonMark_Element_BlockElement::TYPE_SETEXT_HEADER:
case CommonMark_Element_BlockElement::TYPE_HORIZONTAL_RULE:
// nothing to do; we already added the contents.
break;
default:
if ($container->acceptsLines()) {
$this->addLine($ln, $firstNonSpace);
} elseif ($blank) {
// do nothing
} elseif ($container->getType() != CommonMark_Element_BlockElement::TYPE_HORIZONTAL_RULE && $container->getType(
) != CommonMark_Element_BlockElement::TYPE_SETEXT_HEADER
) {
// create paragraph container for line
$container = $this->addChild(CommonMark_Element_BlockElement::TYPE_PARAGRAPH, $lineNumber, $firstNonSpace);
$this->addLine($ln, $firstNonSpace);
} else {
// TODO: throw exception?
}
}
}
}
/**
* @param BlockElement $block
* @param int $lineNumber
*/
public function finalize(CommonMark_Element_BlockElement $block, $lineNumber)
{
$block->finalize($lineNumber, $this->inlineParser, $this->refMap);
$this->tip = $block->getParent(); // typo on 1310?
}
/**
* The main parsing function. Returns a parsed document AST.
* @param string $input
*
* @return BlockElement
*
* @api
*/
public function parse($input)
{
$this->doc = new CommonMark_Element_BlockElement(CommonMark_Element_BlockElement::TYPE_DOCUMENT, 1, 1);
$this->tip = $this->doc;
$this->inlineParser = new CommonMark_InlineParser();
$this->refMap = new CommonMark_Reference_ReferenceMap();
// Remove any /n which appears at the very end of the string
if (substr($input, -1) == "\n") {
$input = substr($input, 0, -1);
}
$lines = preg_split('/\r\n|\n|\r/', $input);
$len = count($lines);
for ($i = 0; $i < $len; $i++) {
$this->incorporateLine($lines[$i], $i + 1);
}
while ($this->tip) {
$this->finalize($this->tip, $len - 1);
}
$this->doc->processInlines($this->inlineParser, $this->refMap);
return $this->doc;
}
}

View File

@ -0,0 +1,478 @@
<?php
/*
* This file is part of the commonmark-php package.
*
* (c) Colin O'Dell <colinodell@gmail.com>
*
* Original code based on stmd.js
* - (c) John MacFarlane
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Block-level element
*/
class CommonMark_Element_BlockElement
{
const TYPE_ATX_HEADER = 'ATXHeader';
const TYPE_BLOCK_QUOTE = 'BlockQuote';
const TYPE_DOCUMENT = 'Document';
const TYPE_FENCED_CODE = 'FencedCode';
const TYPE_HORIZONTAL_RULE = 'HorizontalRule';
const TYPE_HTML_BLOCK = 'HtmlBlock';
const TYPE_INDENTED_CODE = 'IndentedCode';
const TYPE_LIST = 'List';
const TYPE_LIST_ITEM = 'ListItem';
const TYPE_PARAGRAPH = 'Paragraph';
const TYPE_REFERENCE_DEF = 'ReferenceDef';
const TYPE_SETEXT_HEADER = 'SetextHeader';
const LIST_TYPE_ORDERED = 'Outline';
const LIST_TYPE_UNORDERED = 'Bullet';
/**
* @var string
*/
protected $type;
/**
* @var bool
*/
protected $open = true;
/**
* @var bool
*/
protected $lastLineBlank = false;
/**
* @var int
*/
protected $startLine;
/**
* @var int
*/
protected $startColumn;
/**
* @var int
*/
protected $endLine;
/**
* @var ArrayCollection|BlockELement[]
*/
protected $children;
/**
* @var BlockElement|null
*/
protected $parent = null;
/**
* This is formed by concatenating strings, in finalize:
* @var string
*/
protected $stringContent = '';
/**
* @var string[]
*/
protected $strings;
/**
* @var ArrayCollection|InlineElementInterface[]
*/
protected $inlineContent;
/**
* Extra data storage
* @var array
*/
protected $extras = array();
/**
* Constrcutor
*
* @param string $type Block type (see TYPE_ constants)
* @param int $startLine Line where the block element starts
* @param int $startColumn Column where the block element starts
*/
public function __construct($type, $startLine, $startColumn)
{
$this->type = $type;
$this->startLine = $startLine;
$this->startColumn = $startColumn;
$this->endLine = $startLine;
$this->children = new CommonMark_Util_ArrayCollection();
$this->strings = new CommonMark_Util_ArrayCollection();
$this->inlineContent = new CommonMark_Util_ArrayCollection();
}
/**
* Returns true if parent block can contain child block
*
* @param mixed $childType The type of child block to add (see TYPE_ constants)
*
* @return bool
*/
public function canContain($childType)
{
$parentType = $this->type;
return ($parentType == self::TYPE_DOCUMENT ||
$parentType == self::TYPE_BLOCK_QUOTE ||
$parentType == self::TYPE_LIST_ITEM ||
($parentType == self::TYPE_LIST && $childType == self::TYPE_LIST_ITEM));
}
/**
* Returns true if block type can accept lines of text
*
* @return bool
*/
public function acceptsLines()
{
return ($this->type == self::TYPE_PARAGRAPH ||
$this->type == self::TYPE_INDENTED_CODE ||
$this->type == self::TYPE_FENCED_CODE);
}
/**
* Whether the block ends with a blank line
*
* @return bool
*/
public function endsWithBlankLine()
{
if ($this->lastLineBlank) {
return true;
}
if (($this->type == self::TYPE_LIST || $this->type == self::TYPE_LIST_ITEM) && $this->hasChildren()) {
return $this->getChildren()->last()->endsWithBlankLine();
}
return false;
}
/**
* @return ArrayCollection|BlockElement[]
*/
public function getChildren()
{
return $this->children;
}
/**
* @return bool
*/
public function hasChildren()
{
return !$this->children->isEmpty();
}
/**
* @return mixed
*/
public function getType()
{
return $this->type;
}
/**
* @param mixed $type
*
* @return $this
*/
public function setType($type)
{
$this->type = $type;
return $this;
}
/**
* @return BlockElement|null
*/
public function getParent()
{
return $this->parent ? : null;
}
/**
* Whether the block is open for modifications
*
* @return bool
*/
public function getIsOpen()
{
return $this->open;
}
/**
* @return ArrayCollection|string[]
*/
public function getStrings()
{
return $this->strings;
}
/**
* @param BlockElement $parent
*
* @return $this
*/
public function setParent(CommonMark_Element_BlockElement $parent)
{
$this->parent = $parent;
return $this;
}
/**
* @return bool
*/
public function getIsLastLineBlank()
{
return $this->lastLineBlank;
}
/**
* @param bool $value
*
* @return $this
*/
public function setIsLastLineBlank($value)
{
$this->lastLineBlank = $value;
return $this;
}
/**
* @param bool $value
*
* @return $this
*/
public function setIsOpen($value)
{
$this->open = $value;
return $this;
}
/**
* @return int
*/
public function getStartLine()
{
return $this->startLine;
}
/**
* @param int $lineNumber
*
* @return $this
*/
public function setEndLine($lineNumber)
{
$this->endLine = $lineNumber;
return $this;
}
/**
* @return ArrayCollection|InlineElementInterface[]
*/
public function getInlineContent()
{
return $this->inlineContent;
}
/**
* @param string $key
*
* @return mixed|null
*/
public function getExtra($key)
{
return isset($this->extras[$key]) ? $this->extras[$key] : null;
}
/**
* @param string $key
* @param mixed $value
*
* @return $this
*/
public function setExtra($key, $value)
{
$this->extras[$key] = $value;
return $this;
}
/**
* @return string
*/
public function getStringContent()
{
return $this->stringContent;
}
/**
* Returns true if string contains only space characters
*
* @return bool
*/
private function isStringContentBlank()
{
return preg_match('/^\s*$/', $this->stringContent) == 1;
}
/**
* Finalize the block; mark it closed for modification
*
* @param int $lineNumber
* @param InlineParser $inlineParser
* @param ReferenceMap $refMap
*/
public function finalize($lineNumber, CommonMark_InlineParser $inlineParser, CommonMark_Reference_ReferenceMap $refMap)
{
if (!$this->open) {
return;
}
$this->open = false;
if ($lineNumber > $this->startLine) {
$this->endLine = $lineNumber - 1;
} else {
$this->endLine = $lineNumber;
}
switch ($this->getType()) {
case self::TYPE_PARAGRAPH:
$this->stringContent = preg_replace(
'/^ */m',
'',
implode("\n", $this->strings->toArray())
);
// Try parsing the beginning as link reference definitions:
while ($this->stringContent[0] === '[' &&
($pos = $inlineParser->parseReference($this->stringContent, $refMap))
) {
$this->stringContent = substr($this->stringContent, $pos);
if ($this->isStringContentBlank()) { //RegexHelper::getInstance()->isBlank($this->stringContent)) {
$this->type = self::TYPE_REFERENCE_DEF;
break;
}
}
break;
case self::TYPE_ATX_HEADER:
case self::TYPE_SETEXT_HEADER:
case self::TYPE_HTML_BLOCK:
$this->stringContent = implode("\n", $this->strings->toArray());
break;
case self::TYPE_INDENTED_CODE:
$reversed = array_reverse($this->strings->toArray(), true);
foreach ($reversed as $index => $line) {
if ($line == '' || $line === "\n" || preg_match('/^(\n *)$/', $line)) {
unset($reversed[$index]);
} else {
break;
}
}
$fixed = array_reverse($reversed);
$tmp = implode("\n", $fixed);
if (substr($tmp, -1) !== "\n") {
$tmp .= "\n";
}
$this->stringContent = $tmp;
break;
case self::TYPE_FENCED_CODE:
// first line becomes info string
$this->setExtra('info', CommonMark_Util_RegexHelper::unescape(trim($this->strings->first())));
if ($this->strings->count() == 1) {
$this->stringContent = '';
} else {
$this->stringContent = implode("\n", $this->strings->slice(1)) . "\n";
}
break;
case self::TYPE_LIST:
$this->setExtra('tight', true); // tight by default
$numItems = $this->children->count();
$i = 0;
while ($i < $numItems) {
/** @var BlockElement $item */
$item = $this->children->get($i);
// check for non-final list item ending with blank line:
$lastItem = $i == $numItems - 1;
if ($item->endsWithBlankLine() && !$lastItem) {
$this->setExtra('tight', false);
break;
}
// Recurse into children of list item, to see if there are
// spaces between any of them:
$numSubItems = $item->getChildren()->count();
$j = 0;
while ($j < $numSubItems) {
$subItem = $item->getChildren()->get($j);
$lastSubItem = $j == $numSubItems - 1;
if ($subItem->endsWithBlankLine() && !($lastItem && $lastSubItem)) {
$this->setExtra('tight', false);
break;
}
$j++;
}
$i++;
}
break;
default:
break;
}
}
/**
* @param InlineParser $inlineParser
* @param ReferenceMap $refMap
*/
public function processInlines(CommonMark_InlineParser $inlineParser, CommonMark_Reference_ReferenceMap $refMap)
{
switch ($this->getType()) {
case self::TYPE_PARAGRAPH:
case self::TYPE_SETEXT_HEADER:
case self::TYPE_ATX_HEADER:
$this->inlineContent = $inlineParser->parse(trim($this->stringContent), $refMap);
$this->stringContent = '';
break;
default:
break;
}
if ($this->hasChildren()) {
foreach ($this->getChildren() as $child) {
$child->processInlines($inlineParser, $refMap);
}
}
}
}

View File

@ -0,0 +1,125 @@
<?php
/*
* This file is part of the commonmark-php package.
*
* (c) Colin O'Dell <colinodell@gmail.com>
*
* Original code based on stmd.js
* - (c) John MacFarlane
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Provides static methods to simplify and standardize the creation of inline elements
*/
class CommonMark_Element_InlineCreator
{
/**
* @param string $code
*
* @return InlineElement
*/
public static function createCode($code)
{
return new CommonMark_Element_InlineElement(CommonMark_Element_InlineElement::TYPE_CODE, array('c' => $code));
}
/**
* @param string $contents
*
* @return InlineElement
*/
public static function createEmph($contents)
{
return new CommonMark_Element_InlineElement(CommonMark_Element_InlineElement::TYPE_EMPH, array('c' => $contents));
}
/**
* @param string $contents
*
* @return InlineElement
*/
public static function createEntity($contents)
{
return new CommonMark_Element_InlineElement(CommonMark_Element_InlineElement::TYPE_ENTITY, array('c' => $contents));
}
/**
* @return InlineElement
*/
public static function createHardbreak()
{
return new CommonMark_Element_InlineElement(CommonMark_Element_InlineElement::TYPE_HARDBREAK);
}
/**
* @param string $html
*
* @return InlineElement
*/
public static function createHtml($html)
{
return new CommonMark_Element_InlineElement(CommonMark_Element_InlineElement::TYPE_HTML, array('c' => $html));
}
/**
* @param string $destination
* @param string|ArrayCollection|null $label
* @param string|null $title
*
* @return InlineElement
*/
public static function createLink($destination, $label = null, $title = null)
{
$attr = array('destination' => $destination);
if (is_string($label)) {
$attr['label'] = array(self::createString($label));
} elseif (is_object($label) && $label instanceof CommonMark_Util_ArrayCollection) {
$attr['label'] = $label->toArray();
} elseif (empty($label)) {
$attr['label'] = array(self::createString($destination));
} else {
$attr['label'] = $label;
}
if ($title) {
$attr['title'] = $title;
}
return new CommonMark_Element_InlineElement(CommonMark_Element_InlineElement::TYPE_LINK, $attr);
}
/**
* @return InlineElement
*/
public static function createSoftbreak()
{
return new CommonMark_Element_InlineElement(CommonMark_Element_InlineElement::TYPE_SOFTBREAK);
}
/**
* @param string $contents
*
* @return InlineElement
*/
public static function createString($contents)
{
return new CommonMark_Element_InlineElement(CommonMark_Element_InlineElement::TYPE_STRING, array('c' => $contents));
}
/**
* @param string $contents
*
* @return InlineElement
*/
public static function createStrong($contents)
{
return new CommonMark_Element_InlineElement(CommonMark_Element_InlineElement::TYPE_STRONG, array('c' => $contents));
}
}

View File

@ -0,0 +1,114 @@
<?php
/*
* This file is part of the commonmark-php package.
*
* (c) Colin O'Dell <colinodell@gmail.com>
*
* Original code based on stmd.js
* - (c) John MacFarlane
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Inline element
*/
class CommonMark_Element_InlineElement implements CommonMark_Element_InlineElementInterface
{
const TYPE_CODE = 'Code';
const TYPE_EMPH = 'Emph';
const TYPE_ENTITY = 'Entity';
const TYPE_HARDBREAK = 'Hardbreak';
const TYPE_HTML = 'Html';
const TYPE_IMAGE = 'Image';
const TYPE_LINK = 'Link';
const TYPE_SOFTBREAK = 'Softbreak';
const TYPE_STRING = 'Str';
const TYPE_STRONG = 'Strong';
/**
* @var mixed
*/
protected $type;
/**
* @var array
*/
protected $attributes;
/**
* @param mixed $type
* @param array $attributes
*/
public function __construct($type, array $attributes = array())
{
$this->type = $type;
$this->attributes = $attributes;
}
/**
* @return mixed
*/
public function getType()
{
return $this->type;
}
/**
* @param string $type
*
* @return $this
*/
public function setType($type)
{
$this->type = $type;
return $this;
}
/**
* @return mixed|null
*/
public function getContents()
{
return $this->getAttribute('c');
}
/**
* @param mixed $contents
*
* @return $this
*/
public function setContents($contents)
{
$this->setAttribute('c', $contents);
return $this;
}
/**
* @param string $attrName
*
* @return mixed|null
*/
public function getAttribute($attrName)
{
return isset($this->attributes[$attrName]) ? $this->attributes[$attrName] : null;
}
/**
* @param string $attrName
* @param mixed $value
*
* @return $this
*/
public function setAttribute($attrName, $value)
{
$this->attributes[$attrName] = $value;
return $this;
}
}

View File

@ -0,0 +1,56 @@
<?php
/*
* This file is part of the commonmark-php package.
*
* (c) Colin O'Dell <colinodell@gmail.com>
*
* Original code based on stmd.js
* - (c) John MacFarlane
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
interface CommonMark_Element_InlineElementInterface
{
/**
* @return $string
*/
public function getType();
/**
* @param string $type
*
* @return $this
*/
public function setType($type);
/**
* @return mixed
*/
public function getContents();
/**
* @param mixed $contents
*
* @return $this
*/
public function setContents($contents);
/**
* @param string $attrName
*
* @return mixed
*/
public function getAttribute($attrName);
/**
* @param string $attrName
* @param mixed $value
*
* @return $this
*/
public function setAttribute($attrName, $value);
}

View File

@ -0,0 +1,261 @@
<?php
/*
* This file is part of the commonmark-php package.
*
* (c) Colin O'Dell <colinodell@gmail.com>
*
* Original code based on stmd.js
* - (c) John MacFarlane
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Renders a parsed AST to HTML
*/
class CommonMark_HtmlRenderer
{
protected $blockSeparator = "\n";
protected $innerSeparator = "\n";
protected $softBreak = "\n";
/**
* @param string $string
* @param bool $preserveEntities
*
* @return string
*
* @todo: Can we use simple find/replace instead?
*/
protected function escape($string, $preserveEntities = false)
{
if ($preserveEntities) {
$string = preg_replace('/[&](?![#](x[a-f0-9]{1,8}|[0-9]{1,8});|[a-z][a-z0-9]{1,31};)/i', '&amp;', $string);
} else {
$string = preg_replace('/[&]/', '&amp;', $string);
}
$string = preg_replace('/[<]/', '&lt;', $string);
$string = preg_replace('/[>]/', '&gt;', $string);
$string = preg_replace('/["]/', '&quot;', $string);
return $string;
}
/**
* Helper function to produce content in a pair of HTML tags.
*
* @param string $tag
* @param array $attribs
* @param string|null $contents
* @param bool $selfClosing
*
* @return string
*/
protected function inTags($tag, $attribs = array(), $contents = null, $selfClosing = false)
{
$result = '<' . $tag;
foreach ($attribs as $key => $value) {
$result .= ' ' . $key . '="' . $value . '"';
}
if ($contents) {
$result .= '>' . $contents . '</' . $tag . '>';
} elseif ($selfClosing) {
$result .= ' />';
} else {
$result .= '></' . $tag . '>';
}
return $result;
}
/**
* @param InlineElementInterface $inline
*
* @return mixed|string
*
* @throws \InvalidArgumentException
*/
public function renderInline(CommonMark_Element_InlineElementInterface $inline)
{
$attrs = array();
switch ($inline->getType()) {
case CommonMark_Element_InlineElement::TYPE_STRING:
return $this->escape($inline->getContents());
case CommonMark_Element_InlineElement::TYPE_SOFTBREAK:
return $this->softBreak;
case CommonMark_Element_InlineElement::TYPE_HARDBREAK:
return $this->inTags('br', array(), '', true) . "\n";
case CommonMark_Element_InlineElement::TYPE_EMPH:
return $this->inTags('em', array(), $this->renderInlines($inline->getContents()));
case CommonMark_Element_InlineElement::TYPE_STRONG:
return $this->inTags('strong', array(), $this->renderInlines($inline->getContents()));
case CommonMark_Element_InlineElement::TYPE_HTML:
return $inline->getContents();
case CommonMark_Element_InlineElement::TYPE_ENTITY:
return $inline->getContents();
case CommonMark_Element_InlineElement::TYPE_LINK:
$attrs['href'] = $this->escape($inline->getAttribute('destination'), true);
if ($title = $inline->getAttribute('title')) {
$attrs['title'] = $this->escape($title, true);
}
return $this->inTags('a', $attrs, $this->renderInlines($inline->getAttribute('label')));
case CommonMark_Element_InlineElement::TYPE_IMAGE:
$attrs['src'] = $this->escape($inline->getAttribute('destination'), true);
$attrs['alt'] = $this->escape($this->renderInlines($inline->getAttribute('label')));
if ($title = $inline->getAttribute('title')) {
$attrs['title'] = $this->escape($title, true);
}
return $this->inTags('img', $attrs, '', true);
case CommonMark_Element_InlineElement::TYPE_CODE:
return $this->inTags('code', array(), $this->escape($inline->getContents()));
default:
throw new InvalidArgumentException('Unknown inline type: ' . $inline->getType());
}
}
/**
* @param InlineElement[] $inlines
*
* @return string
*/
public function renderInlines($inlines)
{
$result = array();
foreach ($inlines as $inline) {
$result[] = $this->renderInline($inline);
}
return implode('', $result);
}
/**
* @param BlockElement $block
* @param bool $inTightList
*
* @return string
*
* @throws \RuntimeException
*/
public function renderBlock(CommonMark_Element_BlockElement $block, $inTightList = false)
{
switch ($block->getType()) {
case CommonMark_Element_BlockElement::TYPE_DOCUMENT:
$wholeDoc = $this->renderBlocks($block->getChildren());
return $wholeDoc === '' ? '' : $wholeDoc . "\n";
case CommonMark_Element_BlockElement::TYPE_PARAGRAPH:
if ($inTightList) {
return $this->renderInlines($block->getInlineContent());
} else {
return $this->inTags('p', array(), $this->renderInlines($block->getInlineContent()));
}
break;
case CommonMark_Element_BlockElement::TYPE_BLOCK_QUOTE:
$filling = $this->renderBlocks($block->getChildren());
if ($filling === '') {
return $this->inTags('blockquote', array(), $this->innerSeparator);
} else {
return $this->inTags(
'blockquote',
array(),
$this->innerSeparator . $filling . $this->innerSeparator
);
}
case CommonMark_Element_BlockElement::TYPE_LIST_ITEM:
return trim($this->inTags('li', array(), $this->renderBlocks($block->getChildren(), $inTightList)));
case CommonMark_Element_BlockElement::TYPE_LIST:
$listData = $block->getExtra('list_data');
$start = isset($listData['start']) ? $listData['start'] : null;
$tag = $listData['type'] == CommonMark_Element_BlockElement::LIST_TYPE_UNORDERED ? 'ul' : 'ol';
$attr = (!$start || $start == 1) ?
array() : array('start' => (string)$start);
return $this->inTags(
$tag,
$attr,
$this->innerSeparator . $this->renderBlocks(
$block->getChildren(),
$block->getExtra('tight')
) . $this->innerSeparator
);
case CommonMark_Element_BlockElement::TYPE_ATX_HEADER:
case CommonMark_Element_BlockElement::TYPE_SETEXT_HEADER:
$tag = 'h' . $block->getExtra('level');
return $this->inTags($tag, array(), $this->renderInlines($block->getInlineContent()));
case CommonMark_Element_BlockElement::TYPE_INDENTED_CODE:
return $this->inTags(
'pre',
array(),
$this->inTags('code', array(), $this->escape($block->getStringContent()))
);
case CommonMark_Element_BlockElement::TYPE_FENCED_CODE:
$infoWords = preg_split('/ +/', $block->getExtra('info'));
$attr = count($infoWords) === 0 || strlen(
$infoWords[0]
) === 0 ? array() : array('class' => 'language-' . $this->escape($infoWords[0], true));
return $this->inTags(
'pre',
array(),
$this->inTags('code', $attr, $this->escape($block->getStringContent()))
);
case CommonMark_Element_BlockElement::TYPE_HTML_BLOCK:
return $block->getStringContent();
case CommonMark_Element_BlockElement::TYPE_REFERENCE_DEF:
return '';
case CommonMark_Element_BlockElement::TYPE_HORIZONTAL_RULE:
return $this->inTags('hr', array(), '', true);
default:
throw new RuntimeException('Unknown block type: ' . $block->getType());
}
}
/**
* @param BlockElement[] $blocks
* @param bool $inTightList
*
* @return string
*/
public function renderBlocks($blocks, $inTightList = false)
{
$result = array();
foreach ($blocks as $block) {
if ($block->getType() !== 'ReferenceDef') {
$result[] = $this->renderBlock($block, $inTightList);
}
}
return implode($this->blockSeparator, $result);
}
/**
* @param BlockElement $block
* @param bool $inTightList
*
* @return string
*
* @api
*/
public function render(CommonMark_Element_BlockElement $block, $inTightList = false)
{
return $this->renderBlock($block, $inTightList);
}
}

View File

@ -0,0 +1,827 @@
<?php
/*
* This file is part of the commonmark-php package.
*
* (c) Colin O'Dell <colinodell@gmail.com>
*
* Original code based on stmd.js
* - (c) John MacFarlane
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Parses inline elements
*/
class CommonMark_InlineParser
{
/**
* @var string
*/
protected $subject;
/**
* @var int
*/
protected $labelNestLevel = 0; // Used by parseLinkLabel method
/**
* @var int
*/
protected $pos = 0;
/**
* @var ReferenceMap
*/
protected $refmap;
/**
* @var RegexHelper
*/
protected $regexHelper;
/**
* Constrcutor
*/
public function __construct()
{
$this->refmap = new CommonMark_Reference_ReferenceMap();
}
/**
* If re matches at current position in the subject, advance
* position in subject and return the match; otherwise return null
* @param string $re
*
* @return string|null The match (if found); null otherwise
*/
protected function match($re)
{
$matches = array();
$subject = substr($this->subject, $this->pos);
if (!preg_match($re, $subject, $matches, PREG_OFFSET_CAPTURE)) {
return null;
}
// [0][0] contains the matched text
// [0][1] contains the index of that match
$this->pos += $matches[0][1] + strlen($matches[0][0]);
return $matches[0][0];
}
/**
* Returns the character at the current subject position, or null if
* there are no more characters
*
* @return string|null
*/
protected function peek()
{
return substr($this->subject, $this->pos, 1) ? : null;
}
/**
* Parse zero or more space characters, including at most one newline
*
* @return int
*/
protected function spnl()
{
$this->match('/^ *(?:\n *)?/');
return 1;
}
// All of the parsers below try to match something at the current position
// in the subject. If they succeed in matching anything, they
// push an inline element onto the 'inlines' list. They return the
// number of characters parsed (possibly 0).
/**
* Attempt to parse backticks, adding either a backtick code span or a
* literal sequence of backticks to the 'inlines' list.
* @param \ColinODell\CommonMark\Util\ArrayCollection $inlines
*
* @return int Number of characters parsed
*/
protected function parseBackticks(CommonMark_Util_ArrayCollection $inlines)
{
$startpos = $this->pos;
$ticks = $this->match('/^`+/');
if (!$ticks) {
return 0;
}
$afterOpenTicks = $this->pos;
$foundCode = false;
$match = null;
while (!$foundCode && ($match = $this->match('/`+/m'))) {
if ($match == $ticks) {
$c = substr($this->subject, $afterOpenTicks, $this->pos - $afterOpenTicks - strlen($ticks));
$c = preg_replace('/[ \n]+/', ' ', $c);
$inlines->add(CommonMark_Element_InlineCreator::createCode(trim($c)));
return ($this->pos - $startpos);
}
}
// If we go here, we didn't match a closing backtick sequence
$inlines->add(CommonMark_Element_InlineCreator::createString($ticks));
$this->pos = $afterOpenTicks;
return ($this->pos - $startpos);
}
/**
* Parse a backslash-escaped special character, adding either the escaped
* character, a hard line break (if the backslash is followed by a newline),
* or a literal backslash to the 'inlines' list.
*
* @param \ColinODell\CommonMark\Util\ArrayCollection $inlines
*
* @return int
*/
protected function parseEscaped(CommonMark_Util_ArrayCollection $inlines)
{
$subject = $this->subject;
$pos = $this->pos;
if ($subject[$pos] === '\\') {
if (isset($subject[$pos + 1]) && $subject[$pos + 1] === "\n") {
$inlines->add(CommonMark_Element_InlineCreator::createHardbreak());
$this->pos = $this->pos + 2;
return 2;
} elseif (isset($subject[$pos + 1]) && preg_match(
'/' . CommonMark_Util_RegexHelper::REGEX_ESCAPABLE . '/',
$subject[$pos + 1]
)
) {
$inlines->add(CommonMark_Element_InlineCreator::createString($subject[$pos + 1]));
$this->pos = $this->pos + 2;
return 2;
} else {
$this->pos++;
$inlines->add(CommonMark_Element_InlineCreator::createString('\\'));
return 1;
}
} else {
return 0;
}
}
/**
* Attempt to parse an autolink (URL or email in pointy brackets)
* @param \ColinODell\CommonMark\Util\ArrayCollection $inlines
*
* @return int
*/
protected function parseAutolink(CommonMark_Util_ArrayCollection $inlines)
{
$emailRegex = '/^<([a-zA-Z0-9.!#$%&\'*+\\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>/';
$otherLinkRegex = '/^<(?:coap|doi|javascript|aaa|aaas|about|acap|cap|cid|crid|data|dav|dict|dns|file|ftp|geo|go|gopher|h323|http|https|iax|icap|im|imap|info|ipp|iris|iris.beep|iris.xpc|iris.xpcs|iris.lwz|ldap|mailto|mid|msrp|msrps|mtqp|mupdate|news|nfs|ni|nih|nntp|opaquelocktoken|pop|pres|rtsp|service|session|shttp|sieve|sip|sips|sms|snmp|soap.beep|soap.beeps|tag|tel|telnet|tftp|thismessage|tn3270|tip|tv|urn|vemmi|ws|wss|xcon|xcon-userid|xmlrpc.beep|xmlrpc.beeps|xmpp|z39.50r|z39.50s|adiumxtra|afp|afs|aim|apt|attachment|aw|beshare|bitcoin|bolo|callto|chrome|chrome-extension|com-eventbrite-attendee|content|cvs|dlna-playsingle|dlna-playcontainer|dtn|dvb|ed2k|facetime|feed|finger|fish|gg|git|gizmoproject|gtalk|hcp|icon|ipn|irc|irc6|ircs|itms|jar|jms|keyparc|lastfm|ldaps|magnet|maps|market|message|mms|ms-help|msnim|mumble|mvn|notes|oid|palm|paparazzi|platform|proxy|psyc|query|res|resource|rmi|rsync|rtmp|secondlife|sftp|sgn|skype|smb|soldat|spotify|ssh|steam|svn|teamspeak|things|udp|unreal|ut2004|ventrilo|view-source|webcal|wtai|wyciwyg|xfire|xri|ymsgr):[^<>\x00-\x20]*>/i';
if ($m = $this->match($emailRegex)) {
$email = substr($m, 1, -1);
$inlines->add(CommonMark_Element_InlineCreator::createLink('mailto:' . $email, $email));
return strlen($m);
} elseif ($m = $this->match($otherLinkRegex)) {
$dest = substr($m, 1, -1);
$inlines->add(CommonMark_Element_InlineCreator::createLink($dest, $dest));
return strlen($m);
} else {
return 0;
}
}
/**
* Attempt to parse a raw HTML tag
* @param \ColinODell\CommonMark\Util\ArrayCollection $inlines
*
* @return int
*/
protected function parseHtmlTag(CommonMark_Util_ArrayCollection $inlines)
{
if ($m = $this->match(CommonMark_Util_RegexHelper::getInstance()->getHtmlTagRegex())) {
$inlines->add(CommonMark_Element_InlineCreator::createHtml($m));
return strlen($m);
} else {
return 0;
}
}
/**
* Scan a sequence of characters == c, and return information about
* the number of delimiters and whether they are positioned such that
* they can open and/or close emphasis or strong emphasis. A utility
* function for strong/emph parsing.
*
* @param string $char
*
* @return array
*/
protected function scanDelims($char)
{
$numDelims = 0;
$startPos = $this->pos;
$charBefore = $this->pos === 0 ? "\n" : $this->subject[$this->pos - 1];
while ($this->peek() === $char) {
$numDelims++;
$this->pos++;
}
$charAfter = $this->peek() ? : "\n";
$canOpen = $numDelims > 0 && $numDelims <= 3 && !preg_match('/\s/', $charAfter);
$canClose = $numDelims > 0 && $numDelims <= 3 && !preg_match('/\s/', $charBefore);
if ($char === '_') {
$canOpen = $canOpen && !preg_match('/[a-z0-9]/i', $charBefore);
$canClose = $canClose && !preg_match('/[a-z0-9]/i', $charAfter);
}
$this->pos = $startPos;
return compact('numDelims', 'canOpen', 'canClose');
}
/**
* @param ArrayCollection $inlines
*
* @return int
*/
protected function parseEmphasis(CommonMark_Util_ArrayCollection $inlines)
{
$startPos = $this->pos;
$firstClose = 0;
$nxt = $this->peek();
if ($nxt == '*' || $nxt == '_') {
$c = $nxt;
} else {
return 0;
}
// Get opening delimiters
$res = $this->scanDelims($c);
$numDelims = $res['numDelims'];
$this->pos += $numDelims;
// We provisionally add a literal string. If we match appropriate
// closing delimiters, we'll change this to Strong or Emph.
$inlines->add(CommonMark_Element_InlineCreator::createString(substr($this->subject, $this->pos - $numDelims, $numDelims)));
// Record the position of this opening delimiter:
$delimPos = $inlines->count() - 1;
if (!$res['canOpen'] || $numDelims === 0) {
return 0;
}
$firstCloseDelims = 0;
switch ($numDelims) {
case 1: // we started with * or _
while (true) {
$res = $this->scanDelims($c);
if ($res['numDelims'] >= 1 && $res['canClose']) {
$this->pos += 1;
// Convert the inline at delimpos, currently a string with the delim,
// into an Emph whose contents are the succeeding inlines
$inlines->get($delimPos)->setType(CommonMark_Element_InlineElement::TYPE_EMPH);
$inlines->get($delimPos)->setContents($inlines->slice($delimPos + 1));
$inlines->splice($delimPos + 1);
break;
} else {
if ($this->parseInline($inlines) === 0) {
break;
}
}
}
return ($this->pos - $startPos);
case 2: // We started with ** or __
while (true) {
$res = $this->scanDelims($c);
if ($res['numDelims'] >= 2 && $res['canClose']) {
$this->pos += 2;
$inlines->get($delimPos)->setType(CommonMark_Element_InlineElement::TYPE_STRONG);
$inlines->get($delimPos)->setContents($inlines->slice($delimPos + 1));
$inlines->splice($delimPos + 1);
break;
} else {
if ($this->parseInline($inlines) === 0) {
break;
}
}
}
return ($this->pos - $startPos);
case 3: // We started with *** or ___
while (true) {
$res = $this->scanDelims($c);
if ($res['numDelims'] >= 1 && $res['numDelims'] <= 3 && $res['canClose'] && $res['numDelims'] != $firstCloseDelims) {
if ($firstCloseDelims === 1 && $numDelims > 2) {
$res['numDelims'] = 2;
} elseif ($firstCloseDelims === 2) {
$res['numDelims'] = 1;
} elseif ($res['numDelims'] === 3) {
// If we opened with ***, then we interpret *** as ** followed by *
// giving us <strong><em>
$res['numDelims'] = 1;
}
$this->pos += $res['numDelims'];
if ($firstClose > 0) { // if we've already passed the first closer:
$targetInline = $inlines->get($delimPos);
if ($firstCloseDelims === 1) {
$targetInline->setType(CommonMark_Element_InlineElement::TYPE_STRONG);
$targetInline->setContents(
array(
CommonMark_Element_InlineCreator::createEmph(
$inlines->slice($delimPos + 1, $firstClose - $delimPos - 1)
)
)
);
} else {
$targetInline->setType(CommonMark_Element_InlineElement::TYPE_EMPH);
$targetInline->setContents(
array(
CommonMark_Element_InlineCreator::createStrong(
$inlines->slice($delimPos + 1, $firstClose - $delimPos - 1)
)
)
);
}
$targetInline->setContents($targetInline->getContents() + $inlines->slice($firstClose + 1));
$inlines->splice($delimPos + 1);
break;
} else {
// this is the first closer; for now, add literal string;
// we'll change this when he hit the second closer
$str = substr($this->subject, $this->pos - $res['numDelims'], $this->pos);
$inlines->add(CommonMark_Element_InlineCreator::createString($str));
$firstClose = $inlines->count() - 1;
$firstCloseDelims = $res['numDelims'];
}
} else {
// Parse another inline element, til we hit the end
if ($this->parseInline($inlines) === 0) {
break;
}
}
}
return ($this->pos - $startPos);
}
return 0;
}
/**
* Attempt to parse link title (sans quotes)
*
* @return null|string The string, or null if no match
*/
protected function parseLinkTitle()
{
if ($title = $this->match(CommonMark_Util_RegexHelper::getInstance()->getLinkTitleRegex())) {
// Chop off quotes from title and unescape
return CommonMark_Util_RegexHelper::unescape(substr($title, 1, strlen($title) - 2));
} else {
return null;
}
}
/**
* Attempt to parse link destination
*
* @return null|string The string, or null if no match
*/
protected function parseLinkDestination()
{
if ($res = $this->match(CommonMark_Util_RegexHelper::getInstance()->getLinkDestinationBracesRegex())) {
// Chop off surrounding <..>:
return CommonMark_Util_RegexHelper::unescape(substr($res, 1, strlen($res) - 2));
} else {
$res = $this->match(CommonMark_Util_RegexHelper::getInstance()->getLinkDestinationRegex());
if ($res !== null) {
return CommonMark_Util_RegexHelper::unescape($res);
} else {
return null;
}
}
}
/**
* @return int
*/
protected function parseLinkLabel()
{
if ($this->peek() != '[') {
return 0;
}
$startPos = $this->pos;
$nestLevel = 0;
if ($this->labelNestLevel > 0) {
// If we've already checked to the end of this subject
// for a label, even with a different starting [, we
// know we won't find one here and we can just return.
// This avoids lots of backtracking.
// Note: nest level 1 would be: [foo [bar]
// nest level 2 would be: [foo [bar [baz]
$this->labelNestLevel--;
return 0;
}
$this->pos++; // Advance past [
while (($c = $this->peek()) && ($c != ']' || $nestLevel > 0)) {
switch ($c) {
case '`':
$this->parseBackticks(new CommonMark_Util_ArrayCollection());
break;
case '<':
$this->parseAutolink(new CommonMark_Util_ArrayCollection()) || $this->parseHtmlTag(
new CommonMark_Util_ArrayCollection()
) || $this->parseString(new CommonMark_Util_ArrayCollection()); // TODO: Does PHP support this use of "||"?
break;
case '[': // nested []
$nestLevel++;
$this->pos++;
break;
case ']': //nested []
$nestLevel--;
$this->pos++;
break;
case '\\':
$this->parseEscaped(new CommonMark_Util_ArrayCollection());
break;
default:
$this->parseString(new CommonMark_Util_ArrayCollection());
}
}
if ($c === ']') {
$this->labelNestLevel = 0;
$this->pos++; // advance past ]
return $this->pos - $startPos;
} else {
if (!$c) {
$this->labelNestLevel = $nestLevel;
}
$this->pos = $startPos;
return 0;
}
}
/**
* Parse raw link label, including surrounding [], and return
* inline contents.
*
* @param string $s
*
* @return ArrayCollection|InlineElementInterface[] Inline contents
*/
private function parseRawLabel($s)
{
// note: parse without a refmap; we don't want links to resolve
// in nested brackets!
$parser = new self();
$substring = substr($s, 1, strlen($s) - 2);
return $parser->parse($substring, new CommonMark_Reference_ReferenceMap());
}
/**
* Attempt to parse a link. If successful, add the link to inlines.
* @param ArrayCollection $inlines
*
* @return int
*/
protected function parseLink(CommonMark_Util_ArrayCollection $inlines)
{
$startPos = $this->pos;
$n = $this->parseLinkLabel();
if ($n === 0) {
return 0;
}
$rawLabel = substr($this->subject, $startPos, $n);
// if we got this far, we've parsed a label.
// Try to parse an explicit link: [label](url "title")
if ($this->peek() == '(') {
$this->pos++;
if ($this->spnl() &&
(($dest = $this->parseLinkDestination()) !== null) &&
$this->spnl()
) {
// make sure there's a space before the title:
if (preg_match('/^\\s/', $this->subject[$this->pos - 1])) {
$title = $this->parseLinkTitle() ? : '';
} else {
$title = null;
}
if ($this->spnl() && $this->match('/^\\)/')) {
$inlines->add(CommonMark_Element_InlineCreator::createLink($dest, $this->parseRawLabel($rawLabel), $title));
return $this->pos - $startPos;
}
}
$this->pos = $startPos;
return 0;
}
// If we're here, it wasn't an explicit link. Try to parse a reference link.
// first, see if there's another label
$savePos = $this->pos;
$this->spnl();
$beforeLabel = $this->pos;
$n = $this->parseLinkLabel();
if ($n == 2) {
// empty second label
$refLabel = $rawLabel;
} elseif ($n > 0) {
$refLabel = substr($this->subject, $beforeLabel, $n);
} else {
$this->pos = $savePos;
$refLabel = $rawLabel;
}
// Lookup rawLabel in refmap
if ($link = $this->refmap->getReference($refLabel)) {
$inlines->add(
CommonMark_Element_InlineCreator::createLink($link->getDestination(), $this->parseRawLabel($rawLabel), $link->getTitle())
);
return $this->pos - $startPos;
}
// Nothing worked, rewind:
$this->pos = $startPos;
return 0;
}
/**
* Attempt to parse an entity, adding to inlines if successful
* @param \ColinODell\CommonMark\Util\ArrayCollection $inlines
*
* @return int
*/
protected function parseEntity(CommonMark_Util_ArrayCollection $inlines)
{
if ($m = $this->match('/^&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});/i')) {
$inlines->add(CommonMark_Element_InlineCreator::createEntity($m));
return strlen($m);
}
return 0;
}
/**
* Parse a run of ordinary characters, or a single character with
* a special meaning in markdown, as a plain string, adding to inlines.
*
* @param \ColinODell\CommonMark\Util\ArrayCollection $inlines
*
* @return int
*/
protected function parseString(CommonMark_Util_ArrayCollection $inlines)
{
if ($m = $this->match(CommonMark_Util_RegexHelper::getInstance()->getMainRegex())) {
$inlines->add(CommonMark_Element_InlineCreator::createString($m));
return strlen($m);
}
return 0;
}
/**
* Parse a newline. If it was preceded by two spaces, return a hard
* line break; otherwise a soft line break.
*
* @param \ColinODell\CommonMark\Util\ArrayCollection $inlines
*
* @return int
*/
protected function parseNewline(CommonMark_Util_ArrayCollection $inlines)
{
if ($this->peek() == "\n") {
$this->pos++;
$last = $inlines->last();
if ($last && $last->getType() == CommonMark_Element_InlineElement::TYPE_STRING && substr($last->getContents(), -2) == ' ') {
$last->setContents(rtrim($last->getContents(), ' '));
$inlines->add(CommonMark_Element_InlineCreator::createHardbreak());
} else {
if ($last && $last->getType() == CommonMark_Element_InlineElement::TYPE_STRING && substr(
$last->getContents(),
-1
) == ' '
) {
$last->setContents(substr($last->getContents(), 0, -1));
}
$inlines->add(CommonMark_Element_InlineCreator::createSoftbreak());
}
return 1;
}
return 0;
}
/**
* @param ArrayCollection $inlines
*
* @return int
*
* @throws \RuntimeException
*/
protected function parseImage(CommonMark_Util_ArrayCollection $inlines)
{
if ($this->match('/^!/')) {
$n = $this->parseLink($inlines);
if ($n === 0) {
$inlines->add(CommonMark_Element_InlineCreator::createString('!'));
return 1;
}
/** @var InlineElementInterface $last */
$last = $inlines->last();
if ($last && $last->getType() == CommonMark_Element_InlineElement::TYPE_LINK) {
$last->setType(CommonMark_Element_InlineElement::TYPE_IMAGE);
return $n + 1;
} else {
// This shouldn't happen
throw new RuntimeException('Unknown error occurred while attempting to parse an image');
}
} else {
return 0;
}
}
/**
* Parse the next inline element in subject, advancing subject position
* and adding the result to 'inlines'.
*
* @param \ColinODell\CommonMark\Util\ArrayCollection $inlines
*
* @return int
*/
protected function parseInline(CommonMark_Util_ArrayCollection $inlines)
{
$c = $this->peek();
$res = null;
switch ($c) {
case "\n":
$res = $this->parseNewline($inlines);
break;
case '\\':
$res = $this->parseEscaped($inlines);
break;
case '`':
$res = $this->parseBackticks($inlines);
break;
case '*':
case '_':
$res = $this->parseEmphasis($inlines);
break;
case '[':
$res = $this->parseLink($inlines);
break;
case '!':
$res = $this->parseImage($inlines);
break;
case '<':
$res = $this->parseAutolink($inlines) ? : $this->parseHtmlTag($inlines);
break;
case '&':
$res = $this->parseEntity($inlines);
break;
default:
// Nothing
}
return $res ? : $this->parseString($inlines);
}
/**
* Parse s as a list of inlines, using refmap to resolve references.
*
* @param string $s
* @param ReferenceMap $refMap
*
* @return ArrayCollection|InlineElementInterface[]
*/
protected function parseInlines($s, CommonMark_Reference_ReferenceMap $refMap)
{
$this->subject = $s;
$this->pos = 0;
$this->refmap = $refMap;
$inlines = new CommonMark_Util_ArrayCollection();
while ($this->parseInline($inlines)) {
;
}
return $inlines;
}
/**
* @param string $s
* @param ReferenceMap $refMap
*
* @return ArrayCollection|Element\InlineElementInterface[]
*/
public function parse($s, CommonMark_Reference_ReferenceMap $refMap)
{
return $this->parseInlines($s, $refMap);
}
/**
* Attempt to parse a link reference, modifying refmap.
* @param string $s
* @param ReferenceMap $refMap
*
* @return int
*/
public function parseReference($s, CommonMark_Reference_ReferenceMap $refMap)
{
$this->subject = $s;
$this->pos = 0;
$startPos = $this->pos;
// label:
$matchChars = $this->parseLinkLabel();
if ($matchChars === 0) {
return 0;
} else {
$label = substr($this->subject, 0, $matchChars);
}
// colon:
if ($this->peek() === ':') {
$this->pos++;
} else {
$this->pos = $startPos;
return 0;
}
// link url
$this->spnl();
$destination = $this->parseLinkDestination();
if ($destination === null || strlen($destination) === 0) {
$this->pos = $startPos;
return 0;
}
$beforeTitle = $this->pos;
$this->spnl();
$title = $this->parseLinkTitle();
if ($title === null) {
$title = '';
// rewind before spaces
$this->pos = $beforeTitle;
}
// make sure we're at line end:
if ($this->match('/^ *(?:\n|$)/') === null) {
$this->pos = $startPos;
return 0;
}
if (!$refMap->contains($label)) {
$refMap->addReference(new CommonMark_Reference_Reference($label, $destination, $title));
}
return $this->pos - $startPos;
}
}

View File

@ -0,0 +1,92 @@
<?php
/*
* This file is part of the commonmark-php package.
*
* (c) Colin O'Dell <colinodell@gmail.com>
*
* Original code based on stmd.js
* - (c) John MacFarlane
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Link reference
*/
class CommonMark_Reference_Reference
{
/**
* @var string
*/
protected $label;
/**
* @var string
*/
protected $destination;
/**
* @var string
*/
protected $title;
/**
* Constructor
*
* @param string $label
* @param string $destination
* @param string $title
*/
public function __construct($label, $destination, $title)
{
$this->label = self::normalizeReference($label);
$this->destination = $destination;
$this->title = $title;
}
/**
* @return string
*/
public function getLabel()
{
return $this->label;
}
/**
* @return string
*/
public function getDestination()
{
return $this->destination;
}
/**
* @return string
*/
public function getTitle()
{
return $this->title;
}
/**
* Normalize reference label
*
* This enables case-insensitive label matching
*
* @param string $string
*
* @return string
*/
public static function normalizeReference($string)
{
// Collapse internal whitespace to single space and remove
// leading/trailing whitespace
$string = preg_replace('/\s+/', '', trim($string));
return mb_strtoupper($string, 'UTF-8');
}
}

View File

@ -0,0 +1,66 @@
<?php
/*
* This file is part of the commonmark-php package.
*
* (c) Colin O'Dell <colinodell@gmail.com>
*
* Original code based on stmd.js
* - (c) John MacFarlane
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* A collection of references, indexed by label
*/
class CommonMark_Reference_ReferenceMap
{
/**
* @var Reference[]
*/
protected $references = array();
/**
* @param Reference $reference
*
* @return $this
*/
public function addReference(CommonMark_Reference_Reference $reference)
{
$key = CommonMark_Reference_Reference::normalizeReference($reference->getLabel());
$this->references[$key] = $reference;
return $this;
}
/**
* @param string $label
*
* @return bool
*/
public function contains($label)
{
$label = CommonMark_Reference_Reference::normalizeReference($label);
return isset($this->references[$label]);
}
/**
* @param string $label
*
* @return Reference|null
*/
public function getReference($label)
{
$label = CommonMark_Reference_Reference::normalizeReference($label);
if (isset($this->references[$label])) {
return $this->references[$label];
} else {
return null;
}
}
}

View File

@ -0,0 +1,225 @@
<?php
/*
* This file is part of the commonmark-php package.
*
* (c) Colin O'Dell <colinodell@gmail.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Array collection
*
* Provides a wrapper around a standard PHP array.
*/
class CommonMark_Util_ArrayCollection implements IteratorAggregate, Countable, ArrayAccess
{
/**
* @var array
*/
private $elements;
/**
* Constructor
* @param array $elements
*/
public function __construct(array $elements = array())
{
$this->elements = $elements;
}
/**
* @return mixed
*/
public function first()
{
return reset($this->elements);
}
/**
* @return mixed
*/
public function last()
{
return end($this->elements);
}
/**
* Retrieve an external iterator
*
* @return \Traversable
*/
public function getIterator()
{
return new ArrayIterator($this->elements);
}
/**
* @param mixed $element
*
* @return bool
*/
public function add($element)
{
$this->elements[] = $element;
return true;
}
/**
* @param mixed $key
* @param mixed $value
*/
public function set($key, $value)
{
$this->elements[$key] = $value;
}
/**
* @param mixed $key
*
* @return mixed|null
*/
public function get($key)
{
return isset($this->elements[$key]) ? $this->elements[$key] : null;
}
/**
* @param mixed $key
*
* @return mixed|null
*/
public function remove($key)
{
if (!isset($this->elements[$key]) && !array_key_exists($key, $this->elements)) {
return null;
}
$removed = $this->elements[$key];
unset($this->elements[$key]);
return $removed;
}
/**
* @return bool
*/
public function isEmpty()
{
return empty($this->elements);
}
/**
* @param mixed $key
*
* @return bool
*/
public function containsKey($key)
{
return isset($this->elements[$key]) || array_key_exists($key, $this->elements);
}
/**
* Count elements of an object
*
* @return int The count as an integer.
*/
public function count()
{
return count($this->elements);
}
/**
* Whether an offset exists
*
* @param mixed $offset An offset to check for.
*
* @return boolean true on success or false on failure.
*/
public function offsetExists($offset)
{
return $this->containsKey($offset);
}
/**
* Offset to retrieve
* @param mixed $offset The offset to retrieve.
*
* @return mixed|null
*/
public function offsetGet($offset)
{
return $this->get($offset);
}
/**
* Offset to set
* @param mixed $offset The offset to assign the value to.
* @param mixed $value The value to set.
*
* @return void
*/
public function offsetSet($offset, $value)
{
if (!isset($offset)) {
$this->add($value);
} else {
$this->set($offset, $value);
}
}
/**
* Offset to unset
* @param mixed $offset The offset to unset.
*
* @return void
*/
public function offsetUnset($offset)
{
$this->remove($offset);
}
/**
* Returns a subset of the array
* @param int $offset
* @param int|null $length
*
* @return array
*/
public function slice($offset, $length = null)
{
return array_slice($this->elements, $offset, $length, true);
}
/**
* Remove a subset of the array
*
* The removed part will be returned
*
* @param int $offset
* @param int|null $length
* @param array $replacement
*
* @return array The removed subset
*/
public function splice($offset, $length = null, $replacement = array())
{
if ($length === null) {
$length = count($this->elements);
}
return array_splice($this->elements, $offset, $length, $replacement);
}
/**
* @return array
*/
public function toArray()
{
return $this->elements;
}
}

View File

@ -0,0 +1,252 @@
<?php
/*
* This file is part of the commonmark-php package.
*
* (c) Colin O'Dell <colinodell@gmail.com>
*
* Original code based on stmd.js
* - (c) John MacFarlane
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
/**
* Provides regular expressions and utilties for parsing Markdown
*
* Singletons are generally bad, but it allows us to build the regexes once (and only once).
*/
class CommonMark_Util_RegexHelper
{
const ESCAPABLE = 0;
const ESCAPED_CHAR = 1;
const IN_DOUBLE_QUOTES = 2;
const IN_SINGLE_QUOTES = 3;
const IN_PARENS = 4;
const REG_CHAR = 5;
const IN_PARENS_NOSP = 6;
const TAGNAME = 7;
const BLOCKTAGNAME = 8;
const ATTRIBUTENAME = 9;
const UNQUOTEDVALUE = 10;
const SINGLEQUOTEDVALUE = 11;
const DOUBLEQUOTEDVALUE = 12;
const ATTRIBUTEVALUE = 13;
const ATTRIBUTEVALUESPEC = 14;
const ATTRIBUTE = 15;
const OPENTAG = 16;
const CLOSETAG = 17;
const OPENBLOCKTAG = 18;
const CLOSEBLOCKTAG = 19;
const HTMLCOMMENT = 20;
const PROCESSINGINSTRUCTION = 21;
const DECLARATION = 22;
const CDATA = 23;
const HTMLTAG = 24;
const HTMLBLOCKOPEN = 25;
const LINK_TITLE = 26;
const REGEX_ESCAPABLE = '[!"#$%&\'()*+,.\/:;<=>?@[\\\\\]^_`{|}~-]';
protected $regex = array();
static protected $instance;
/**
* Constructor
*/
protected function __construct()
{
$this->buildRegexPatterns();
}
/**
* @return RegexHelper
*/
public static function getInstance()
{
if (self::$instance === null) {
self::$instance = new CommonMark_Util_RegexHelper();
}
return self::$instance;
}
/**
* Builds the regular expressions required to parse Markdown
*
* We could hard-code them all as pre-built constants, but that would be more difficult to manage.
*/
protected function buildRegexPatterns()
{
$regex = array();
$regex[self::ESCAPABLE] = self::REGEX_ESCAPABLE;
$regex[self::ESCAPED_CHAR] = '\\\\' . $regex[self::ESCAPABLE];
$regex[self::IN_DOUBLE_QUOTES] = '"(' . $regex[self::ESCAPED_CHAR] . '|[^"\x00])*"';
$regex[self::IN_SINGLE_QUOTES] = '\'(' . $regex[self::ESCAPED_CHAR] . '|[^\'\x00])*\'';
$regex[self::IN_PARENS] = '\\((' . $regex[self::ESCAPED_CHAR] . '|[^)\x00])*\\)';
$regex[self::REG_CHAR] = '[^\\\\()\x00-\x20]';
$regex[self::IN_PARENS_NOSP] = '\((' . $regex[self::REG_CHAR] . '|' . $regex[self::ESCAPED_CHAR] . ')*\)';
$regex[self::TAGNAME] = '[A-Za-z][A-Za-z0-9]*';
$regex[self::BLOCKTAGNAME] = '(?:article|header|aside|hgroup|iframe|blockquote|hr|body|li|map|button|object|canvas|ol|caption|output|col|p|colgroup|pre|dd|progress|div|section|dl|table|td|dt|tbody|embed|textarea|fieldset|tfoot|figcaption|th|figure|thead|footer|footer|tr|form|ul|h1|h2|h3|h4|h5|h6|video|script|style)';
$regex[self::ATTRIBUTENAME] = '[a-zA-Z_:][a-zA-Z0-9:._-]*';
$regex[self::UNQUOTEDVALUE] = '[^"\'=<>`\x00-\x20]+';
$regex[self::SINGLEQUOTEDVALUE] = '\'[^\']*\'';
$regex[self::DOUBLEQUOTEDVALUE] = '"[^"]*"';
$regex[self::ATTRIBUTEVALUE] = '(?:' . $regex[self::UNQUOTEDVALUE] . '|' . $regex[self::SINGLEQUOTEDVALUE] . '|' . $regex[self::DOUBLEQUOTEDVALUE] . ')';
$regex[self::ATTRIBUTEVALUESPEC] = '(?:' . '\s*=' . '\s*' . $regex[self::ATTRIBUTEVALUE] . ')';
$regex[self::ATTRIBUTE] = '(?:' . '\s+' . $regex[self::ATTRIBUTENAME] . $regex[self::ATTRIBUTEVALUESPEC] . '?)';
$regex[self::OPENTAG] = '<' . $regex[self::TAGNAME] . $regex[self::ATTRIBUTE] . '*' . '\s*\/?>';
$regex[self::CLOSETAG] = '<\/' . $regex[self::TAGNAME] . '\s*[>]';
$regex[self::OPENBLOCKTAG] = '<' . $regex[self::BLOCKTAGNAME] . $regex[self::ATTRIBUTE] . '*' . '\s*\/?>';
$regex[self::CLOSEBLOCKTAG] = '<\/' . $regex[self::BLOCKTAGNAME] . '\s*[>]';
$regex[self::HTMLCOMMENT] = '<!--([^-]+|[-][^-]+)*-->';
$regex[self::PROCESSINGINSTRUCTION] = '[<][?].*?[?][>]';
$regex[self::DECLARATION] = '<![A-Z]+' . '\s+[^>]*>';
$regex[self::CDATA] = '<!\[CDATA\[([^\]]+|\][^\]]|\]\][^>])*\]\]>';
$regex[self::HTMLTAG] = '(?:' . $regex[self::OPENTAG] . '|' . $regex[self::CLOSETAG] . '|' . $regex[self::HTMLCOMMENT] . '|' .
$regex[self::PROCESSINGINSTRUCTION] . '|' . $regex[self::DECLARATION] . '|' . $regex[self::CDATA] . ')';
$regex[self::HTMLBLOCKOPEN] = '<(?:' . $regex[self::BLOCKTAGNAME] . '[\s\/>]' . '|' .
'\/' . $regex[self::BLOCKTAGNAME] . '[\s>]' . '|' . '[?!])';
$regex[self::LINK_TITLE] = '^(?:"(' . $regex[self::ESCAPED_CHAR] . '|[^"\x00])*"' .
'|' . '\'(' . $regex[self::ESCAPED_CHAR] . '|[^\'\x00])*\'' .
'|' . '\((' . $regex[self::ESCAPED_CHAR] . '|[^)\x00])*\))';
$this->regex = $regex;
}
/**
* Returns a partial regex
*
* It'll need to be wrapped with /.../ before use
* @param int $const
*
* @return string
*/
public function getPartialRegex($const)
{
return $this->regex[$const];
}
/**
* @return string
*/
public function getHtmlTagRegex()
{
return '/^' . $this->regex[self::HTMLTAG] . '/i';
}
/**
* @return string
*/
public function getHtmlBlockOpenRegex()
{
return '/^' . $this->regex[self::HTMLBLOCKOPEN] . '/i';
}
/**
* @return string
*/
public function getLinkTitleRegex()
{
return '/' . $this->regex[self::LINK_TITLE] . '/';
}
/**
* @return string
*/
public function getLinkDestinationRegex()
{
return '/^' . '(?:' . $this->regex[self::REG_CHAR] . '+|' . $this->regex[self::ESCAPED_CHAR] . '|' . $this->regex[self::IN_PARENS_NOSP] . ')*' . '/';
}
/**
* @return string
*/
public function getLinkDestinationBracesRegex()
{
return '/^(?:' . '[<](?:[^<>\\n\\\\\\x00]' . '|' . $this->regex[self::ESCAPED_CHAR] . '|' . '\\\\)*[>]' . ')/';
}
/**
* @return string
*/
public function getHRuleRegex()
{
return '/^(?:(?:\* *){3,}|(?:_ *){3,}|(?:- *){3,}) *$/';
}
/**
* Matches a character with a special meaning in markdown,
* or a string of non-special characters.
*
* @return string
*/
public function getMainRegex()
{
return '/^(?:[\n`\[\]\\\\!<&*_]|[^\n`\[\]\\\\!<&*_]+)/m';
}
/**
* Attempt to match a regex in string s at offset offset
* @param string $regex
* @param string $string
* @param int $offset
*
* @return int|null Index of match, or null
*/
public static function matchAt($regex, $string, $offset)
{
$matches = array();
$string = substr($string, $offset);
if (!preg_match($regex, $string, $matches, PREG_OFFSET_CAPTURE)) {
return null;
}
return $offset + $matches[0][1];
}
/**
* Functional wrapper around preg_match_all
*
* @param string $pattern
* @param string $subject
* @param int $offset
*
* @return array|null
*/
public static function matchAll($pattern, $subject, $offset = 0)
{
$matches = array();
$subject = substr($subject, $offset);
preg_match_all($pattern, $subject, $matches, PREG_PATTERN_ORDER);
$fullMatches = reset($matches);
if (empty($fullMatches)) {
return null;
}
if (count($fullMatches) == 1) {
foreach ($matches as &$match) {
$match = reset($match);
}
}
return !empty($matches) ? $matches : null;
}
/**
* Replace backslash escapes with literal characters
* @param string $string
*
* @return string
*/
public static function unescape($string)
{
$allEscapedChar = '/\\\\(' . self::REGEX_ESCAPABLE . ')/';
return preg_replace($allEscapedChar, '$1', $string);
}
}

File diff suppressed because it is too large Load Diff

View File

@ -478,11 +478,10 @@ class Widget_Abstract_Comments extends Widget_Abstract
static $parser;
if (empty($parser)) {
$parser = new ParsedownExtra();
$parser->setBreaksEnabled(true);
$parser = new CommonMark_CommonMarkConverter();
}
$html = $parser->text($text);
$html = $parser->convertToHtml($text);
}
return $html;

View File

@ -962,11 +962,10 @@ class Widget_Abstract_Contents extends Widget_Abstract
static $parser;
if (empty($parser)) {
$parser = new ParsedownExtra();
$parser->setBreaksEnabled(true);
$parser = new CommonMark_CommonMarkConverter();
}
$html = $parser->text($text);
$html = $parser->convertToHtml($text);
}
return $html;