MDL-29585 mod_wiki: Handle special characters in headings/sections

This commit is contained in:
Frederic Massart 2014-11-11 16:13:56 +08:00
parent c106341098
commit c159e2aa0a
4 changed files with 158 additions and 7 deletions

View File

@ -91,6 +91,12 @@ class creole_parser extends wiki_markup_parser {
parent::before_parsing();
}
public function get_section($header, $text, $clean = false) {
// The requested header is likely to have been passed to htmlspecialchars in
// self::before_parsing(), therefore we should decode it when looking for it.
return parent::get_section(htmlspecialchars_decode($header), $text, $clean);
}
protected function header_block_rule($match) {
$num = strlen($match[1]);

View File

@ -17,10 +17,14 @@ class html_parser extends nwiki_parser {
public function __construct() {
parent::__construct();
$this->tagrules = array('link' => $this->tagrules['link'], 'url' => $this->tagrules['url']);
// Headers are considered tags here.
$this->tagrules['header'] = array('expression' => "/<\s*h([1-$this->maxheaderdepth])\s*>(.+?)<\/h[1-$this->maxheaderdepth]>/is"
// The order is important, headers should be parsed before links.
$this->tagrules = array(
// Headers are considered tags here.
'header' => array(
'expression' => "/<\s*h([1-$this->maxheaderdepth])\s*>(.+?)<\/h[1-$this->maxheaderdepth]>/is"
),
'link' => $this->tagrules['link'],
'url' => $this->tagrules['url']
);
}
@ -52,7 +56,8 @@ class html_parser extends nwiki_parser {
$h1 = array("<\s*h1\s*>", "<\/h1>");
preg_match("/(.*?)({$h1[0]}\s*\Q$header\E\s*{$h1[1]}.*?)((?:\n{$h1[0]}.*)|$)/is", $text, $match);
$regex = "/(.*?)({$h1[0]}\s*".preg_quote($header, '/')."\s*{$h1[1]}.*?)((?:\n{$h1[0]}.*)|$)/is";
preg_match($regex, $text, $match);
if (!empty($match)) {
return array($match[1], $match[2], $match[3]);

View File

@ -401,7 +401,8 @@ abstract class wiki_markup_parser extends generic_parser {
$text .= "\n\n";
}
preg_match("/(.*?)(=\ *\Q$header\E\ *=*\n.*?)((?:\n=[^=]+.*)|$)/is", $text, $match);
$regex = "/(.*?)(=\ *".preg_quote($header, '/')."\ *=*\n.*?)((?:\n=[^=]+.*)|$)/is";
preg_match($regex, $text, $match);
if (!empty($match)) {
return array($match[1], $match[2], $match[3]);

View File

@ -64,7 +64,7 @@ class mod_wiki_wikiparser_test extends basic_testcase {
$result['parsed_text'] = preg_replace('~[\r\n]~', '', $result['parsed_text']);
$output = preg_replace('~[\r\n]~', '', $output);
$this->assertEquals($result['parsed_text'], $output);
$this->assertEquals($output, $result['parsed_text']);
return true;
}
@ -74,4 +74,143 @@ class mod_wiki_wikiparser_test extends basic_testcase {
$i++;
}
}
/**
* Check that headings with special characters work as expected with HTML.
*
* - The heading itself is well displayed,
* - The TOC heading is well display,
* - The edit link points to the right page,
* - The links properly works with get_section.
*/
public function test_special_headings() {
// First testing HTML markup.
// Test section name using HTML entities.
$input = '<h1>Code &amp; Test</h1>';
$output = '<h1><a name="toc-1"></a>Code &amp; Test<a href="edit.php?pageid=&amp;section=Code+%26amp%3B+Test" '.
'class="wiki_edit_section">[edit]</a></h1>' . "\n";
$toc = '<div class="wiki-toc"><p class="wiki-toc-title">Table of contents</p><p class="wiki-toc-section-1 '.
'wiki-toc-section">1. <a href="#toc-1">Code &amp; Test<a href="edit.php?pageid=&amp;section=Code+%26amp%3B+'.
'Test" class="wiki_edit_section">[edit]</a></a></p></div>';
$section = wiki_parser_proxy::get_section($input, 'html', 'Code &amp; Test');
$actual = wiki_parser_proxy::parse($input, 'html');
$this->assertEquals($output, $actual['parsed_text']);
$this->assertEquals($toc, $actual['toc']);
$this->assertNotEquals(false, $section);
// Test section name using non-ASCII characters.
$input = '<h1>Another áéíóúç€ test</h1>';
$output = '<h1><a name="toc-1"></a>Another áéíóúç€ test<a href="edit.php?pageid=&amp;section=Another+%C'.
'3%A1%C3%A9%C3%AD%C3%B3%C3%BA%C3%A7%E2%82%AC+test" class="wiki_edit_section">[edit]</a></h1>' . "\n";
$toc = '<div class="wiki-toc"><p class="wiki-toc-title">Table of contents</p><p class="wiki-toc-section-1 '.
'wiki-toc-section">1. <a href="#toc-1">Another áéíóúç€ test<a href="edit.php?pageid=&amp;section=Another+%C'.
'3%A1%C3%A9%C3%AD%C3%B3%C3%BA%C3%A7%E2%82%AC+test" class="wiki_edit_section">[edit]</a></a></p></div>';
$section = wiki_parser_proxy::get_section($input, 'html', 'Another áéíóúç€ test');
$actual = wiki_parser_proxy::parse($input, 'html');
$this->assertEquals($output, $actual['parsed_text']);
$this->assertEquals($toc, $actual['toc']);
$this->assertNotEquals(false, $section);
// Test section name with a URL.
$input = '<h1>Another http://moodle.org test</h1>';
$output = '<h1><a name="toc-1"></a>Another <a href="http://moodle.org">http://moodle.org</a> test<a href="edit.php'.
'?pageid=&amp;section=Another+http%3A%2F%2Fmoodle.org+test" class="wiki_edit_section">[edit]</a></h1>' . "\n";
$toc = '<div class="wiki-toc"><p class="wiki-toc-title">Table of contents</p><p class="wiki-toc-section-1 '.
'wiki-toc-section">1. <a href="#toc-1">Another http://moodle.org test<a href="edit.php?pageid=&amp;section='.
'Another+http%3A%2F%2Fmoodle.org+test" class="wiki_edit_section">[edit]</a></a></p></div>';
$section = wiki_parser_proxy::get_section($input, 'html', 'Another http://moodle.org test');
$actual = wiki_parser_proxy::parse($input, 'html', array(
'link_callback' => '/mod/wiki/locallib.php:wiki_parser_link'
));
$this->assertEquals($output, $actual['parsed_text']);
$this->assertEquals($toc, $actual['toc']);
$this->assertNotEquals(false, $section);
// Now going to test Creole markup.
// Note that Creole uses links to the escaped version of the section.
// Test section name using HTML entities.
$input = '= Code & Test =';
$output = '<h1><a name="toc-1"></a>Code &amp; Test<a href="edit.php?pageid=&amp;section=Code+%26amp%3B+Test" '.
'class="wiki_edit_section">[edit]</a></h1>' . "\n";
$toc = '<div class="wiki-toc"><p class="wiki-toc-title">Table of contents</p><p class="wiki-toc-section-1 '.
'wiki-toc-section">1. <a href="#toc-1">Code &amp; Test<a href="edit.php?pageid=&amp;section=Code+%26amp%3B+'.
'Test" class="wiki_edit_section">[edit]</a></a></p></div>';
$section = wiki_parser_proxy::get_section($input, 'creole', 'Code &amp; Test');
$actual = wiki_parser_proxy::parse($input, 'creole');
$this->assertEquals($output, $actual['parsed_text']);
$this->assertEquals($toc, $actual['toc']);
$this->assertNotEquals(false, $section);
// Test section name using non-ASCII characters.
$input = '= Another áéíóúç€ test =';
$output = '<h1><a name="toc-1"></a>Another áéíóúç€ test<a href="edit.php?pageid=&amp;section=Another+%C'.
'3%A1%C3%A9%C3%AD%C3%B3%C3%BA%C3%A7%E2%82%AC+test" class="wiki_edit_section">[edit]</a></h1>' . "\n";
$toc = '<div class="wiki-toc"><p class="wiki-toc-title">Table of contents</p><p class="wiki-toc-section-1 '.
'wiki-toc-section">1. <a href="#toc-1">Another áéíóúç€ test<a href="edit.php?pageid=&amp;section=Another+%C'.
'3%A1%C3%A9%C3%AD%C3%B3%C3%BA%C3%A7%E2%82%AC+test" class="wiki_edit_section">[edit]</a></a></p></div>';
$section = wiki_parser_proxy::get_section($input, 'creole', 'Another áéíóúç€ test');
$actual = wiki_parser_proxy::parse($input, 'creole');
$this->assertEquals($output, $actual['parsed_text']);
$this->assertEquals($toc, $actual['toc']);
$this->assertNotEquals(false, $section);
// Test section name with a URL, creole does not support linking links in a heading.
$input = '= Another http://moodle.org test =';
$output = '<h1><a name="toc-1"></a>Another http://moodle.org test<a href="edit.php'.
'?pageid=&amp;section=Another+http%3A%2F%2Fmoodle.org+test" class="wiki_edit_section">[edit]</a></h1>' . "\n";
$toc = '<div class="wiki-toc"><p class="wiki-toc-title">Table of contents</p><p class="wiki-toc-section-1 '.
'wiki-toc-section">1. <a href="#toc-1">Another http://moodle.org test<a href="edit.php?pageid=&amp;section='.
'Another+http%3A%2F%2Fmoodle.org+test" class="wiki_edit_section">[edit]</a></a></p></div>';
$section = wiki_parser_proxy::get_section($input, 'creole', 'Another http://moodle.org test');
$actual = wiki_parser_proxy::parse($input, 'creole');
$this->assertEquals($output, $actual['parsed_text']);
$this->assertEquals($toc, $actual['toc']);
$this->assertNotEquals(false, $section);
// Now going to test NWiki markup.
// Note that Creole uses links to the escaped version of the section.
// Test section name using HTML entities.
$input = '= Code & Test =';
$output = '<h1><a name="toc-1"></a>Code & Test<a href="edit.php?pageid=&amp;section=Code+%26+Test" '.
'class="wiki_edit_section">[edit]</a></h1>' . "\n";
$toc = '<div class="wiki-toc"><p class="wiki-toc-title">Table of contents</p><p class="wiki-toc-section-1 '.
'wiki-toc-section">1. <a href="#toc-1">Code & Test<a href="edit.php?pageid=&amp;section=Code+%26+'.
'Test" class="wiki_edit_section">[edit]</a></a></p></div>';
$section = wiki_parser_proxy::get_section($input, 'nwiki', 'Code & Test');
$actual = wiki_parser_proxy::parse($input, 'nwiki');
$this->assertEquals($output, $actual['parsed_text']);
$this->assertEquals($toc, $actual['toc']);
$this->assertNotEquals(false, $section);
// Test section name using non-ASCII characters.
$input = '= Another áéíóúç€ test =';
$output = '<h1><a name="toc-1"></a>Another áéíóúç€ test<a href="edit.php?pageid=&amp;section=Another+%C'.
'3%A1%C3%A9%C3%AD%C3%B3%C3%BA%C3%A7%E2%82%AC+test" class="wiki_edit_section">[edit]</a></h1>' . "\n";
$toc = '<div class="wiki-toc"><p class="wiki-toc-title">Table of contents</p><p class="wiki-toc-section-1 '.
'wiki-toc-section">1. <a href="#toc-1">Another áéíóúç€ test<a href="edit.php?pageid=&amp;section=Another+%C'.
'3%A1%C3%A9%C3%AD%C3%B3%C3%BA%C3%A7%E2%82%AC+test" class="wiki_edit_section">[edit]</a></a></p></div>';
$section = wiki_parser_proxy::get_section($input, 'nwiki', 'Another áéíóúç€ test');
$actual = wiki_parser_proxy::parse($input, 'nwiki');
$this->assertEquals($output, $actual['parsed_text']);
$this->assertEquals($toc, $actual['toc']);
$this->assertNotEquals(false, $section);
// Test section name with a URL, nwiki does not support linking links in a heading.
$input = '= Another http://moodle.org test =';
$output = '<h1><a name="toc-1"></a>Another http://moodle.org test<a href="edit.php'.
'?pageid=&amp;section=Another+http%3A%2F%2Fmoodle.org+test" class="wiki_edit_section">[edit]</a></h1>' . "\n";
$toc = '<div class="wiki-toc"><p class="wiki-toc-title">Table of contents</p><p class="wiki-toc-section-1 '.
'wiki-toc-section">1. <a href="#toc-1">Another http://moodle.org test<a href="edit.php?pageid=&amp;section='.
'Another+http%3A%2F%2Fmoodle.org+test" class="wiki_edit_section">[edit]</a></a></p></div>';
$section = wiki_parser_proxy::get_section($input, 'nwiki', 'Another http://moodle.org test');
$actual = wiki_parser_proxy::parse($input, 'nwiki');
$this->assertEquals($output, $actual['parsed_text']);
$this->assertEquals($toc, $actual['toc']);
$this->assertNotEquals(false, $section);
}
}