From 6bb0c91a73855b741ddaafb51c4194544884e3a1 Mon Sep 17 00:00:00 2001 From: Andrew Nicols <andrew@nicols.co.uk> Date: Fri, 10 Nov 2023 14:56:31 +0800 Subject: [PATCH] MDL-80072 core: Move format_text to core\formatting --- lib/classes/formatting.php | 217 ++++++++++++++++ lib/tests/formatting_test.php | 450 ++++++++++++++++++++++++++++++++++ lib/upgrade.txt | 4 + lib/weblib.php | 190 ++------------ 4 files changed, 689 insertions(+), 172 deletions(-) diff --git a/lib/classes/formatting.php b/lib/classes/formatting.php index 048891986a1..1de6dc4210e 100644 --- a/lib/classes/formatting.php +++ b/lib/classes/formatting.php @@ -132,4 +132,221 @@ class formatting { return $string; } + + + /** + * Given text in a variety of format codings, this function returns the text as safe HTML. + * + * This function should mainly be used for long strings like posts, + * answers, glossary items etc. For short strings {@link format_string()}. + * + * <pre> + * Options: + * trusted : If true the string won't be cleaned. Default false required noclean=true. + * noclean : If true the string won't be cleaned, unless $CFG->forceclean is set. Default false required trusted=true. + * nocache : If true the strign will not be cached and will be formatted every call. Default false. + * filter : If true the string will be run through applicable filters as well. Default true. + * para : If true then the returned string will be wrapped in div tags. Default true. + * newlines : If true then lines newline breaks will be converted to HTML newline breaks. Default true. + * context : The context that will be used for filtering. + * overflowdiv : If set to true the formatted text will be encased in a div + * with the class no-overflow before being returned. Default false. + * allowid : If true then id attributes will not be removed, even when + * using htmlpurifier. Default false. + * blanktarget : If true all <a> tags will have target="_blank" added unless target is explicitly specified. + * </pre> + * + * @staticvar array $croncache + * @param string $text The text to be formatted. This is raw text originally from user input. + * @param int $format Identifier of the text format to be used + * [FORMAT_MOODLE, FORMAT_HTML, FORMAT_PLAIN, FORMAT_MARKDOWN] + * @param stdClass|array $options text formatting options + * @param int $courseiddonotuse deprecated course id, use context option instead + * @return string + */ + public function format_text( + $text, + $format = FORMAT_MOODLE, + $options = null, + ) { + global $CFG, $DB, $PAGE; + + if ($text === '' || is_null($text)) { + // No need to do any filters and cleaning. + return ''; + } + + // Detach object, we can not modify it. + $options = (array)$options; + + if (!isset($options['trusted'])) { + $options['trusted'] = false; + } + if ($format == FORMAT_MARKDOWN) { + // Markdown format cannot be trusted in trusttext areas, + // because we do not know how to sanitise it before editing. + $options['trusted'] = false; + } + if (!isset($options['noclean'])) { + if ($options['trusted'] and trusttext_active()) { + // No cleaning if text trusted and noclean not specified. + $options['noclean'] = true; + } else { + $options['noclean'] = false; + } + } + if (!empty($CFG->forceclean)) { + // Whatever the caller claims, the admin wants all content cleaned anyway. + $options['noclean'] = false; + } + if (!isset($options['nocache'])) { + $options['nocache'] = false; + } + if (!isset($options['filter'])) { + $options['filter'] = true; + } + if (!isset($options['para'])) { + $options['para'] = true; + } + if (!isset($options['newlines'])) { + $options['newlines'] = true; + } + if (!isset($options['overflowdiv'])) { + $options['overflowdiv'] = false; + } + $options['blanktarget'] = !empty($options['blanktarget']); + + // Calculate best context. + if (empty($CFG->version) or $CFG->version < 2013051400 or during_initial_install()) { + // Do not filter anything during installation or before upgrade completes. + $context = null; + } else if (isset($options['context'])) { // First by explicit passed context option. + if (is_object($options['context'])) { + $context = $options['context']; + } else { + $context = context::instance_by_id($options['context']); + } + } else { + // Fallback to $PAGE->context this may be problematic in CLI and other non-standard pages :-(. + $context = $PAGE->context; + } + + if (!$context) { + // Either install/upgrade or something has gone really wrong because context does not exist (yet?). + $options['nocache'] = true; + $options['filter'] = false; + } + + if ($options['filter']) { + $filtermanager = \filter_manager::instance(); + $filtermanager->setup_page_for_filters($PAGE, $context); // Setup global stuff filters may have. + $filteroptions = array( + 'originalformat' => $format, + 'noclean' => $options['noclean'], + ); + } else { + $filtermanager = new \null_filter_manager(); + $filteroptions = array(); + } + + switch ($format) { + case FORMAT_HTML: + $filteroptions['stage'] = 'pre_format'; + $text = $filtermanager->filter_text($text, $context, $filteroptions); + // Text is already in HTML format, so just continue to the next filtering stage. + $filteroptions['stage'] = 'pre_clean'; + $text = $filtermanager->filter_text($text, $context, $filteroptions); + if (!$options['noclean']) { + $text = clean_text($text, FORMAT_HTML, $options); + } + $filteroptions['stage'] = 'post_clean'; + $text = $filtermanager->filter_text($text, $context, $filteroptions); + break; + + case FORMAT_PLAIN: + $text = s($text); // Cleans dangerous JS. + $text = rebuildnolinktag($text); + $text = str_replace(' ', ' ', $text); + $text = nl2br($text); + break; + + case FORMAT_WIKI: + // This format is deprecated. + $text = '<p>NOTICE: Wiki-like formatting has been removed from Moodle. You should not be seeing + this message as all texts should have been converted to Markdown format instead. + Please post a bug report to http://moodle.org/bugs with information about where you + saw this message.</p>' . s($text); + break; + + case FORMAT_MARKDOWN: + $filteroptions['stage'] = 'pre_format'; + $text = $filtermanager->filter_text($text, $context, $filteroptions); + $text = markdown_to_html($text); + $filteroptions['stage'] = 'pre_clean'; + $text = $filtermanager->filter_text($text, $context, $filteroptions); + if (!$options['noclean']) { + $text = clean_text($text, FORMAT_HTML, $options); + } + $filteroptions['stage'] = 'post_clean'; + $text = $filtermanager->filter_text($text, $context, $filteroptions); + break; + + default: // FORMAT_MOODLE or anything else. + $filteroptions['stage'] = 'pre_format'; + $text = $filtermanager->filter_text($text, $context, $filteroptions); + $text = text_to_html($text, null, $options['para'], $options['newlines']); + $filteroptions['stage'] = 'pre_clean'; + $text = $filtermanager->filter_text($text, $context, $filteroptions); + if (!$options['noclean']) { + $text = clean_text($text, FORMAT_HTML, $options); + } + $filteroptions['stage'] = 'post_clean'; + $text = $filtermanager->filter_text($text, $context, $filteroptions); + break; + } + if ($options['filter']) { + // At this point there should not be any draftfile links any more, + // this happens when developers forget to post process the text. + // The only potential problem is that somebody might try to format + // the text before storing into database which would be itself big bug.. + $text = str_replace("\"$CFG->wwwroot/draftfile.php", "\"$CFG->wwwroot/brokenfile.php#", $text); + + if ($CFG->debugdeveloper) { + if (strpos($text, '@@PLUGINFILE@@/') !== false) { + debugging( + 'Before calling format_text(), the content must be processed with file_rewrite_pluginfile_urls()', + DEBUG_DEVELOPER + ); + } + } + } + + if (!empty($options['overflowdiv'])) { + $text = \html_writer::tag('div', $text, array('class' => 'no-overflow')); + } + + if ($options['blanktarget']) { + $domdoc = new \DOMDocument(); + libxml_use_internal_errors(true); + $domdoc->loadHTML('<?xml version="1.0" encoding="UTF-8" ?>' . $text); + libxml_clear_errors(); + foreach ($domdoc->getElementsByTagName('a') as $link) { + if ($link->hasAttribute('target') && strpos($link->getAttribute('target'), '_blank') === false) { + continue; + } + $link->setAttribute('target', '_blank'); + if (strpos($link->getAttribute('rel'), 'noreferrer') === false) { + $link->setAttribute('rel', trim($link->getAttribute('rel') . ' noreferrer')); + } + } + + // This regex is nasty and I don't like it. The correct way to solve this is by loading the HTML like so: + // $domdoc->loadHTML($text, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD); however it seems like some libxml + // versions don't work properly and end up leaving <html><body>, so I'm forced to use + // this regex to remove those tags as a preventive measure. + $text = trim(preg_replace('~<(?:!DOCTYPE|/?(?:html|body))[^>]*>\s*~i', '', $domdoc->saveHTML($domdoc->documentElement))); + } + + return $text; + } } diff --git a/lib/tests/formatting_test.php b/lib/tests/formatting_test.php index 35d5f954df5..33536468634 100644 --- a/lib/tests/formatting_test.php +++ b/lib/tests/formatting_test.php @@ -147,4 +147,454 @@ class formatting_test extends \advanced_testcase { $cachedresult = $formatting->format_string($rawstring, $striplinks, $options); $this->assertMatchesRegularExpression("/$expectedfilter/", $cachedresult); } + + /** + * Test trust option of format_text(). + * + * @covers ::format_text + * @dataProvider format_text_trusted_provider + */ + public function test_format_text_trusted( + $expected, + int $enabletrusttext, + mixed $input, + // Yes... FORMAT_ constants are strings of ints. + string $format, + array $options = [], + ): void { + global $CFG; + $this->resetAfterTest(); + + $CFG->enabletrusttext = $enabletrusttext; + + $formatter = new formatting(); + $this->assertEquals( + $expected, + $formatter->format_text($input, $format, $options), + ); + } + + public static function format_text_trusted_provider(): array { + $text = "lala <object>xx</object>"; + return [ + [ + s($text), + 0, + $text, + FORMAT_PLAIN, + ['trusted' => true], + ], + [ + "<p>lala xx</p>\n", + 0, + $text, + FORMAT_MARKDOWN, + ['trusted' => true], + ], + [ + '<div class="text_to_html">lala xx</div>', + 0, + $text, + FORMAT_MOODLE, + ['trusted' => true], + ], + [ + 'lala xx', + 0, + $text, + FORMAT_HTML, + ['trusted' => true], + ], + + [ + s($text), + 0, + $text, + FORMAT_PLAIN, + ['trusted' => false], + ], + [ + "<p>lala xx</p>\n", + 0, + $text, + FORMAT_MARKDOWN, + ['trusted' => false], + ], + [ + '<div class="text_to_html">lala xx</div>', + 0, + $text, + FORMAT_MOODLE, + ['trusted' => false], + ], + [ + 'lala xx', + 0, + $text, + FORMAT_HTML, + ['trusted' => false], + ], + + [ + s($text), + 1, + $text, + FORMAT_PLAIN, + ['trusted' => true], + ], + [ + "<p>lala xx</p>\n", + 1, + $text, + FORMAT_MARKDOWN, + ['trusted' => true], + ], + [ + '<div class="text_to_html">lala <object>xx</object></div>', + 1, + $text, + FORMAT_MOODLE, + ['trusted' => true], + ], + [ + 'lala <object>xx</object>', + 1, + $text, + FORMAT_HTML, + ['trusted' => true], + ], + + [ + s($text), + 1, + $text, + FORMAT_PLAIN, + ['trusted' => false], + ], + [ + "<p>lala xx</p>\n", + 1, + $text, + FORMAT_MARKDOWN, + ['trusted' => false], + ], + [ + '<div class="text_to_html">lala xx</div>', + 1, + $text, + FORMAT_MOODLE, + ['trusted' => false], + ], + [ + 'lala xx', + 1, + $text, + FORMAT_HTML, + ['trusted' => false], + ], + + [ + "<p>lala <object>xx</object></p>\n", + 1, + $text, + FORMAT_MARKDOWN, + ['trusted' => true, 'noclean' => true], + ], + [ + "<p>lala <object>xx</object></p>\n", + 1, + $text, + FORMAT_MARKDOWN, + ['trusted' => false, 'noclean' => true], + ], + ]; + } + + public function test_format_text_format_html(): void { + $this->resetAfterTest(); + $formatter = new formatting(); + + filter_set_global_state('emoticon', TEXTFILTER_ON); + $this->assertMatchesRegularExpression( + '~^<p><img class="icon emoticon" alt="smile" title="smile" ' . + 'src="https://www.example.com/moodle/theme/image.php/boost/core/1/s/smiley" /></p>$~', + $formatter->format_text('<p>:-)</p>', FORMAT_HTML) + ); + } + + public function test_format_text_format_html_no_filters(): void { + $this->resetAfterTest(); + $formatter = new formatting(); + + filter_set_global_state('emoticon', TEXTFILTER_ON); + $this->assertEquals( + '<p>:-)</p>', + $formatter->format_text('<p>:-)</p>', FORMAT_HTML, array('filter' => false)) + ); + } + + public function test_format_text_format_plain(): void { + // Note FORMAT_PLAIN does not filter ever, no matter we ask for filtering. + $this->resetAfterTest(); + $formatter = new formatting(); + + filter_set_global_state('emoticon', TEXTFILTER_ON); + $this->assertEquals( + ':-)', + $formatter->format_text(':-)', FORMAT_PLAIN) + ); + } + + public function test_format_text_format_plain_no_filters(): void { + $this->resetAfterTest(); + $formatter = new formatting(); + + filter_set_global_state('emoticon', TEXTFILTER_ON); + $this->assertEquals( + ':-)', + $formatter->format_text(':-)', FORMAT_PLAIN, array('filter' => false)) + ); + } + + public function test_format_text_format_markdown(): void { + $this->resetAfterTest(); + $formatter = new formatting(); + + filter_set_global_state('emoticon', TEXTFILTER_ON); + $this->assertMatchesRegularExpression( + '~^<p><em><img class="icon emoticon" alt="smile" title="smile" ' . + 'src="https://www.example.com/moodle/theme/image.php/boost/core/1/s/smiley" />' . + '</em></p>\n$~', + $formatter->format_text('*:-)*', FORMAT_MARKDOWN) + ); + } + + public function test_format_text_format_markdown_nofilter(): void { + $this->resetAfterTest(); + $formatter = new formatting(); + + filter_set_global_state('emoticon', TEXTFILTER_ON); + $this->assertEquals( + "<p><em>:-)</em></p>\n", + $formatter->format_text('*:-)*', FORMAT_MARKDOWN, array('filter' => false)) + ); + } + + public function test_format_text_format_moodle(): void { + $this->resetAfterTest(); + $formatter = new formatting(); + + filter_set_global_state('emoticon', TEXTFILTER_ON); + $this->assertMatchesRegularExpression( + '~^<div class="text_to_html"><p>' . + '<img class="icon emoticon" alt="smile" title="smile" ' . + 'src="https://www.example.com/moodle/theme/image.php/boost/core/1/s/smiley" /></p></div>$~', + $formatter->format_text('<p>:-)</p>', FORMAT_MOODLE) + ); + } + + public function test_format_text_format_moodle_no_filters(): void { + $this->resetAfterTest(); + $formatter = new formatting(); + + filter_set_global_state('emoticon', TEXTFILTER_ON); + $this->assertEquals( + '<div class="text_to_html"><p>:-)</p></div>', + $formatter->format_text('<p>:-)</p>', FORMAT_MOODLE, array('filter' => false)) + ); + } + + /** + * Make sure that nolink tags and spans prevent linking in filters that support it. + */ + public function test_format_text_nolink(): void { + global $CFG; + $this->resetAfterTest(); + $formatter = new formatting(); + + filter_set_global_state('activitynames', TEXTFILTER_ON); + + $course = $this->getDataGenerator()->create_course(); + $context = \context_course::instance($course->id); + $page = $this->getDataGenerator()->create_module( + 'page', + ['course' => $course->id, 'name' => 'Test 1'] + ); + $cm = get_coursemodule_from_instance('page', $page->id, $page->course, false, MUST_EXIST); + $pageurl = $CFG->wwwroot . '/mod/page/view.php?id=' . $cm->id; + + $this->assertSame( + '<p>Read <a class="autolink" title="Test 1" href="' . $pageurl . '">Test 1</a>.</p>', + $formatter->format_text('<p>Read Test 1.</p>', FORMAT_HTML, ['context' => $context]) + ); + + $this->assertSame( + '<p>Read <a class="autolink" title="Test 1" href="' . $pageurl . '">Test 1</a>.</p>', + $formatter->format_text('<p>Read Test 1.</p>', FORMAT_HTML, ['context' => $context, 'noclean' => true]) + ); + + $this->assertSame( + '<p>Read Test 1.</p>', + $formatter->format_text('<p><nolink>Read Test 1.</nolink></p>', FORMAT_HTML, ['context' => $context, 'noclean' => false]) + ); + + $this->assertSame( + '<p>Read Test 1.</p>', + $formatter->format_text('<p><nolink>Read Test 1.</nolink></p>', FORMAT_HTML, ['context' => $context, 'noclean' => true]) + ); + + $this->assertSame( + '<p><span class="nolink">Read Test 1.</span></p>', + $formatter->format_text('<p><span class="nolink">Read Test 1.</span></p>', FORMAT_HTML, ['context' => $context]) + ); + } + + public function test_format_text_overflowdiv(): void { + $formatter = new formatting(); + + $this->assertEquals( + '<div class="no-overflow"><p>Hello world</p></div>', + $formatter->format_text('<p>Hello world</p>', FORMAT_HTML, array('overflowdiv' => true)) + ); + } + + /** + * Test adding blank target attribute to links + * + * @dataProvider format_text_blanktarget_testcases + * @param string $link The link to add target="_blank" to + * @param string $expected The expected filter value + */ + public function test_format_text_blanktarget($link, $expected): void { + $formatter = new formatting(); + $actual = $formatter->format_text($link, FORMAT_MOODLE, array('blanktarget' => true, 'filter' => false, 'noclean' => true)); + $this->assertEquals($expected, $actual); + } + + /** + * Data provider for the test_format_text_blanktarget testcase + * + * @return array of testcases + */ + public function format_text_blanktarget_testcases() { + return [ + 'Simple link' => [ + '<a href="https://www.youtube.com/watch?v=JeimE8Wz6e4">Hey, that\'s pretty good!</a>', + '<div class="text_to_html"><a href="https://www.youtube.com/watch?v=JeimE8Wz6e4" target="_blank"' . + ' rel="noreferrer">Hey, that\'s pretty good!</a></div>' + ], + 'Link with rel' => [ + '<a href="https://www.youtube.com/watch?v=JeimE8Wz6e4" rel="nofollow">Hey, that\'s pretty good!</a>', + '<div class="text_to_html"><a href="https://www.youtube.com/watch?v=JeimE8Wz6e4" rel="nofollow noreferrer"' . + ' target="_blank">Hey, that\'s pretty good!</a></div>' + ], + 'Link with rel noreferrer' => [ + '<a href="https://www.youtube.com/watch?v=JeimE8Wz6e4" rel="noreferrer">Hey, that\'s pretty good!</a>', + '<div class="text_to_html"><a href="https://www.youtube.com/watch?v=JeimE8Wz6e4" rel="noreferrer"' . + ' target="_blank">Hey, that\'s pretty good!</a></div>' + ], + 'Link with target' => [ + '<a href="https://www.youtube.com/watch?v=JeimE8Wz6e4" target="_self">Hey, that\'s pretty good!</a>', + '<div class="text_to_html"><a href="https://www.youtube.com/watch?v=JeimE8Wz6e4" target="_self">' . + 'Hey, that\'s pretty good!</a></div>' + ], + 'Link with target blank' => [ + '<a href="https://www.youtube.com/watch?v=JeimE8Wz6e4" target="_blank">Hey, that\'s pretty good!</a>', + '<div class="text_to_html"><a href="https://www.youtube.com/watch?v=JeimE8Wz6e4" target="_blank"' . + ' rel="noreferrer">Hey, that\'s pretty good!</a></div>' + ], + 'Link with Frank\'s casket inscription' => [ + '<a href="https://en.wikipedia.org/wiki/Franks_Casket">ᚠᛁᛋᚳ᛫ᚠᛚᚩᛞᚢ᛫ᚪᚻᚩᚠᚩᚾᚠᛖᚱᚷ ᛖᚾᛒᛖᚱᛁᚷ ᚹᚪᚱᚦᚷᚪ᛬ᛋᚱᛁᚳᚷᚱᚩᚱᚾᚦᚫᚱᚻᛖᚩᚾᚷᚱᛖᚢᛏᚷᛁᛋᚹᚩᛗ ᚻ' . + 'ᚱᚩᚾᚫᛋᛒᚪᚾ ᛗᚫᚷᛁᚠᛁᛋᚳ᛫ᚠᛚᚩᛞᚢ᛫ᚪᚻᚩᚠᚩᚾᚠᛖᚱᚷ ᛖᚾᛒᛖᚱᛁᚷ ᚹᚪᚱᚦᚷᚪ᛬ᛋᚱᛁᚳᚷᚱᚩᚱᚾᚦᚫᚱᚻᛖᚩᚾᚷᚱᛖᚢᛏᚷᛁᛋᚹᚩᛗ ᚻᚱᚩᚾᚫᛋᛒᚪᚾ ᛗᚫᚷᛁ</a>', + '<div class="text_to_html"><a href="https://en.wikipedia.org/wiki/Franks_Casket" target="_blank" ' . + 'rel="noreferrer">ᚠᛁᛋᚳ᛫ᚠᛚᚩᛞᚢ᛫ᚪᚻᚩᚠᚩᚾᚠᛖᚱᚷ ᛖᚾᛒᛖᚱᛁᚷ ᚹᚪᚱᚦᚷᚪ᛬ᛋᚱᛁᚳᚷᚱᚩᚱᚾᚦᚫᚱᚻᛖᚩᚾᚷᚱᛖᚢᛏᚷᛁᛋᚹᚩᛗ ᚻᚱᚩᚾᚫᛋᛒᚪᚾ ᛗᚫᚷᛁᚠᛁᛋᚳ᛫ᚠᛚᚩᛞᚢ᛫ᚪᚻᚩᚠᚩᚾᚠᛖᚱᚷ ᛖᚾ' . + 'ᛒᛖᚱᛁᚷ ᚹᚪᚱᚦᚷᚪ᛬ᛋᚱᛁᚳᚷᚱᚩᚱᚾᚦᚫᚱᚻᛖᚩᚾᚷᚱᛖᚢᛏᚷᛁᛋᚹᚩᛗ ᚻᚱᚩᚾᚫᛋᛒᚪᚾ ᛗᚫᚷᛁ</a></div>' + ], + 'No link' => [ + 'Some very boring text written with the Latin script', + '<div class="text_to_html">Some very boring text written with the Latin script</div>' + ], + 'No link with Thror\'s map runes' => [ + 'ᛋᛏᚫᚾᛞ ᛒᚣ ᚦᛖ ᚷᚱᛖᚣ ᛋᛏᚩᚾᛖ ᚻᚹᛁᛚᛖ ᚦᛖ ᚦᚱᚢᛋᚻ ᚾᚩᚳᛋ ᚫᚾᛞ ᚦᛖ ᛋᛖᛏᛏᛁᚾᚷ ᛋᚢᚾ ᚹᛁᚦ ᚦᛖ ᛚᚫᛋᛏ ᛚᛁᚷᚻᛏ ᚩᚠ ᛞᚢᚱᛁᚾᛋ ᛞᚫᚣ ᚹᛁᛚᛚ ᛋᚻᛁᚾᛖ ᚢᛈᚩᚾ ᚦᛖ ᚳᛖᚣᚻᚩᛚᛖ', + '<div class="text_to_html">ᛋᛏᚫᚾᛞ ᛒᚣ ᚦᛖ ᚷᚱᛖᚣ ᛋᛏᚩᚾᛖ ᚻᚹᛁᛚᛖ ᚦᛖ ᚦᚱᚢᛋᚻ ᚾᚩᚳᛋ ᚫᚾᛞ ᚦᛖ ᛋᛖᛏᛏᛁᚾᚷ ᛋᚢᚾ ᚹᛁᚦ ᚦᛖ ᛚᚫᛋᛏ ᛚᛁᚷᚻᛏ ᚩᚠ ᛞᚢᚱᛁᚾᛋ ᛞᚫᚣ ᚹ' . + 'ᛁᛚᛚ ᛋᚻᛁᚾᛖ ᚢᛈᚩᚾ ᚦᛖ ᚳᛖᚣᚻᚩᛚᛖ</div>' + ] + ]; + } + + /** + * Test ability to force cleaning of otherwise non-cleaned content. + * + * @dataProvider format_text_cleaning_testcases + * + * @param string $input Input text + * @param string $nocleaned Expected output of format_text() with noclean=true + * @param string $cleaned Expected output of format_text() with noclean=false + */ + public function test_format_text_cleaning($input, $nocleaned, $cleaned): void { + global $CFG; + $this->resetAfterTest(); + $formatter = new formatting(); + + $CFG->forceclean = false; + $actual = $formatter->format_text($input, FORMAT_HTML, ['filter' => false, 'noclean' => false]); + $this->assertEquals($cleaned, $actual); + + $CFG->forceclean = true; + $actual = $formatter->format_text($input, FORMAT_HTML, ['filter' => false, 'noclean' => false]); + $this->assertEquals($cleaned, $actual); + + $CFG->forceclean = false; + $actual = $formatter->format_text($input, FORMAT_HTML, ['filter' => false, 'noclean' => true]); + $this->assertEquals($nocleaned, $actual); + + $CFG->forceclean = true; + $actual = $formatter->format_text($input, FORMAT_HTML, ['filter' => false, 'noclean' => true]); + $this->assertEquals($cleaned, $actual); + } + + /** + * Data provider for the test_format_text_cleaning testcase + * + * @return array of testcases (string)testcasename => [(string)input, (string)nocleaned, (string)cleaned] + */ + public static function format_text_cleaning_testcases(): array { + return [ + 'JavaScript' => [ + 'Hello <script type="text/javascript">alert("XSS");</script> world', + 'Hello <script type="text/javascript">alert("XSS");</script> world', + 'Hello world', + ], + 'Inline frames' => [ + 'Let us go phishing! <iframe src="https://1.2.3.4/google.com"></iframe>', + 'Let us go phishing! <iframe src="https://1.2.3.4/google.com"></iframe>', + 'Let us go phishing! ', + ], + 'Malformed A tags' => [ + '<a onmouseover="alert(document.cookie)">xxs link</a>', + '<a onmouseover="alert(document.cookie)">xxs link</a>', + '<a>xxs link</a>', + ], + 'Malformed IMG tags' => [ + '<IMG """><SCRIPT>alert("XSS")</SCRIPT>">', + '<IMG """><SCRIPT>alert("XSS")</SCRIPT>">', + '">', + ], + 'On error alert' => [ + '<IMG SRC=/ onerror="alert(String.fromCharCode(88,83,83))"></img>', + '<IMG SRC=/ onerror="alert(String.fromCharCode(88,83,83))"></img>', + '<img src="/" alt="" />', + ], + 'IMG onerror and javascript alert encode' => [ + '<img src=x onerror="javascSS')">', + '<img src=x onerror="javascSS')">', + '<img src="x" alt="x" />', + ], + 'DIV background-image' => [ + '<DIV STYLE="background-image: url(javascript:alert(\'XSS\'))">', + '<DIV STYLE="background-image: url(javascript:alert(\'XSS\'))">', + '<div></div>', + ], + ]; + } } diff --git a/lib/upgrade.txt b/lib/upgrade.txt index b5fd18365c3..d840ae02f48 100644 --- a/lib/upgrade.txt +++ b/lib/upgrade.txt @@ -42,6 +42,10 @@ information provided here is intended especially for developers. * \action_menu_link::$instance has been deprecated as it is no longer used. * The `format_string()` method has moved to `\core\formatting::format_string()`. The old method will be maintained, but new code should use the new method with first-class parameters. +* The `format_text()` method has moved to `\core\formatting::format_text()`. + The old method will be maintained, but new code should use the new method with first-class parameters. +* The fourth parameter to `format_text` now emits a deprecation notice. + It was originally deprecated in Moodle 2.0. === 4.3 === diff --git a/lib/weblib.php b/lib/weblib.php index 49fd74283fe..b4d217b9d8c 100644 --- a/lib/weblib.php +++ b/lib/weblib.php @@ -1261,11 +1261,13 @@ function format_text_menu() { * @return string */ function format_text($text, $format = FORMAT_MOODLE, $options = null, $courseiddonotuse = null) { - global $CFG, $DB, $PAGE; + global $CFG; - if ($text === '' || is_null($text)) { - // No need to do any filters and cleaning. - return ''; + // Manually include the formatting class for now until after the release after 4.5 LTS. + require_once("{$CFG->libdir}/classes/formatting.php"); + + if ($options) { + $options = (array) $options; } if ($options instanceof \core\context) { @@ -1279,180 +1281,24 @@ function format_text($text, $format = FORMAT_MOODLE, $options = null, $courseidd $options = ['context' => $options]; } - // Detach object, we can not modify it. - $options = (array)$options; - - if (!isset($options['trusted'])) { - $options['trusted'] = false; - } - if ($format == FORMAT_MARKDOWN) { - // Markdown format cannot be trusted in trusttext areas, - // because we do not know how to sanitise it before editing. - $options['trusted'] = false; - } - if (!isset($options['noclean'])) { - if ($options['trusted'] and trusttext_active()) { - // No cleaning if text trusted and noclean not specified. - $options['noclean'] = true; - } else { - $options['noclean'] = false; - } - } - if (!empty($CFG->forceclean)) { - // Whatever the caller claims, the admin wants all content cleaned anyway. - $options['noclean'] = false; - } - if (!isset($options['nocache'])) { - $options['nocache'] = false; - } - if (!isset($options['filter'])) { - $options['filter'] = true; - } - if (!isset($options['para'])) { - $options['para'] = true; - } - if (!isset($options['newlines'])) { - $options['newlines'] = true; - } - if (!isset($options['overflowdiv'])) { - $options['overflowdiv'] = false; - } - $options['blanktarget'] = !empty($options['blanktarget']); - - // Calculate best context. - if (empty($CFG->version) or $CFG->version < 2013051400 or during_initial_install()) { + if (empty($CFG->version) || $CFG->version < 2013051400 || during_initial_install()) { // Do not filter anything during installation or before upgrade completes. - $context = null; - - } else if (isset($options['context'])) { // First by explicit passed context option. - if (is_object($options['context'])) { - $context = $options['context']; - } else { - $context = context::instance_by_id($options['context']); - } + } else if ($options && isset($options['context'])) { // First by explicit passed context option. + // Do not do anything. } else if ($courseiddonotuse) { // Legacy courseid. - $context = context_course::instance($courseiddonotuse); - } else { - // Fallback to $PAGE->context this may be problematic in CLI and other non-standard pages :-(. - $context = $PAGE->context; - } - - if (!$context) { - // Either install/upgrade or something has gone really wrong because context does not exist (yet?). - $options['nocache'] = true; - $options['filter'] = false; - } - - if ($options['filter']) { - $filtermanager = filter_manager::instance(); - $filtermanager->setup_page_for_filters($PAGE, $context); // Setup global stuff filters may have. - $filteroptions = array( - 'originalformat' => $format, - 'noclean' => $options['noclean'], + $options['context'] = \core\context\course::instance($courseiddonotuse); + debugging( + "Passing a courseid to format_text() is deprecated, please pass a context instead.", + DEBUG_DEVELOPER, ); - } else { - $filtermanager = new null_filter_manager(); - $filteroptions = array(); } - switch ($format) { - case FORMAT_HTML: - $filteroptions['stage'] = 'pre_format'; - $text = $filtermanager->filter_text($text, $context, $filteroptions); - // Text is already in HTML format, so just continue to the next filtering stage. - $filteroptions['stage'] = 'pre_clean'; - $text = $filtermanager->filter_text($text, $context, $filteroptions); - if (!$options['noclean']) { - $text = clean_text($text, FORMAT_HTML, $options); - } - $filteroptions['stage'] = 'post_clean'; - $text = $filtermanager->filter_text($text, $context, $filteroptions); - break; - - case FORMAT_PLAIN: - $text = s($text); // Cleans dangerous JS. - $text = rebuildnolinktag($text); - $text = str_replace(' ', ' ', $text); - $text = nl2br($text); - break; - - case FORMAT_WIKI: - // This format is deprecated. - $text = '<p>NOTICE: Wiki-like formatting has been removed from Moodle. You should not be seeing - this message as all texts should have been converted to Markdown format instead. - Please post a bug report to http://moodle.org/bugs with information about where you - saw this message.</p>'.s($text); - break; - - case FORMAT_MARKDOWN: - $filteroptions['stage'] = 'pre_format'; - $text = $filtermanager->filter_text($text, $context, $filteroptions); - $text = markdown_to_html($text); - $filteroptions['stage'] = 'pre_clean'; - $text = $filtermanager->filter_text($text, $context, $filteroptions); - if (!$options['noclean']) { - $text = clean_text($text, FORMAT_HTML, $options); - } - $filteroptions['stage'] = 'post_clean'; - $text = $filtermanager->filter_text($text, $context, $filteroptions); - break; - - default: // FORMAT_MOODLE or anything else. - $filteroptions['stage'] = 'pre_format'; - $text = $filtermanager->filter_text($text, $context, $filteroptions); - $text = text_to_html($text, null, $options['para'], $options['newlines']); - $filteroptions['stage'] = 'pre_clean'; - $text = $filtermanager->filter_text($text, $context, $filteroptions); - if (!$options['noclean']) { - $text = clean_text($text, FORMAT_HTML, $options); - } - $filteroptions['stage'] = 'post_clean'; - $text = $filtermanager->filter_text($text, $context, $filteroptions); - break; - } - if ($options['filter']) { - // At this point there should not be any draftfile links any more, - // this happens when developers forget to post process the text. - // The only potential problem is that somebody might try to format - // the text before storing into database which would be itself big bug.. - $text = str_replace("\"$CFG->wwwroot/draftfile.php", "\"$CFG->wwwroot/brokenfile.php#", $text); - - if ($CFG->debugdeveloper) { - if (strpos($text, '@@PLUGINFILE@@/') !== false) { - debugging('Before calling format_text(), the content must be processed with file_rewrite_pluginfile_urls()', - DEBUG_DEVELOPER); - } - } - } - - if (!empty($options['overflowdiv'])) { - $text = html_writer::tag('div', $text, array('class' => 'no-overflow')); - } - - if ($options['blanktarget']) { - $domdoc = new DOMDocument(); - libxml_use_internal_errors(true); - $domdoc->loadHTML('<?xml version="1.0" encoding="UTF-8" ?>' . $text); - libxml_clear_errors(); - foreach ($domdoc->getElementsByTagName('a') as $link) { - if ($link->hasAttribute('target') && strpos($link->getAttribute('target'), '_blank') === false) { - continue; - } - $link->setAttribute('target', '_blank'); - if (strpos($link->getAttribute('rel'), 'noreferrer') === false) { - $link->setAttribute('rel', trim($link->getAttribute('rel') . ' noreferrer')); - } - } - - // This regex is nasty and I don't like it. The correct way to solve this is by loading the HTML like so: - // $domdoc->loadHTML($text, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD); however it seems like some libxml - // versions don't work properly and end up leaving <html><body>, so I'm forced to use - // this regex to remove those tags as a preventive measure. - $text = trim(preg_replace('~<(?:!DOCTYPE|/?(?:html|body))[^>]*>\s*~i', '', $domdoc->saveHTML($domdoc->documentElement))); - } - - return $text; + return \core\di::get(\core\formatting::class)->format_text( + $text, + $format, + $options, + ); } /**