mirror of
https://github.com/moodle/moodle.git
synced 2025-04-30 05:18:55 +02:00
Now that PHP has support for named parameters, and we can use them in Moodle, we should ditch `$options` arrays and use first-class, documented, parameters. Whilst this may seem scary, dumb, overwhelming, please note that you do not need to supply all args, for example, to change the last parameter of `format_text` you no longer need to do this: return \core\container::get(\core\formatting::class)->format_text( $text, FORMAT_MOODLE, $context, false, null, true, true, true, false, false, true, ); Instead you can do: return \core\container::get(\core\formatting::class)->format_text( $text, FORMAT_MOODLE, $context, allowid: true, ); Or better still: return \core\container::get(\core\formatting::class)->format_text( text: $text, format: FORMAT_MOODLE, context: $context, allowid: true, ); This means that we can get defaults in the function signature, improves our typing, and allows for deprecation and changes to options. It also sets us up for success in the future.
438 lines
16 KiB
PHP
438 lines
16 KiB
PHP
<?php
|
|
// This file is part of Moodle - http://moodle.org/
|
|
//
|
|
// Moodle is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU General Public License as published by
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
// (at your option) any later version.
|
|
//
|
|
// Moodle is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU General Public License
|
|
// along with Moodle. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
namespace core;
|
|
|
|
/**
|
|
* Content formatting methods for Moodle.
|
|
*
|
|
* @package core
|
|
* @copyright 2023 Andrew Lyons <andrew@nicols.co.uk>
|
|
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
|
|
*/
|
|
class formatting {
|
|
/** @var bool Whether to apply forceclean */
|
|
protected ?bool $forceclean;
|
|
|
|
/** @var bool Whether to apply striptags */
|
|
protected ?bool $striptags;
|
|
|
|
/** @var bool Whether to apply filters */
|
|
protected ?bool $filterall;
|
|
|
|
/** @var array A string cache for format_string */
|
|
protected $formatstringcache = [];
|
|
|
|
/**
|
|
* Given a simple string, this function returns the string
|
|
* processed by enabled string filters if $CFG->filterall is enabled
|
|
*
|
|
* This function should be used to print short strings (non html) that
|
|
* need filter processing e.g. activity titles, post subjects,
|
|
* glossary concepts.
|
|
*
|
|
* @param null|string $string The string to be filtered. Should be plain text, expect
|
|
* possibly for multilang tags.
|
|
* @param boolean $striplinks To strip any link in the result text.
|
|
* @param null|context $context The context used for formatting
|
|
* @param bool $filter Whether to apply filters
|
|
* @param bool $escape Whether to escape ampersands
|
|
* @return string
|
|
*/
|
|
public function format_string(
|
|
?string $string,
|
|
bool $striplinks = true,
|
|
?context $context = null,
|
|
bool $filter = true,
|
|
bool $escape = true,
|
|
): string {
|
|
global $PAGE;
|
|
|
|
if ($string === '' || is_null($string)) {
|
|
// No need to do any filters and cleaning.
|
|
return '';
|
|
}
|
|
|
|
if (!$this->should_filter_string()) {
|
|
return strip_tags($string);
|
|
}
|
|
|
|
if (count($this->formatstringcache) > 2000) {
|
|
// This number might need some tuning to limit memory usage in cron.
|
|
$this->formatstringcache = [];
|
|
}
|
|
|
|
if ($context === null) {
|
|
// Fallback to $PAGE->context this may be problematic in CLI and other non-standard pages :-(.
|
|
// In future we may want to add debugging here.
|
|
$context = $PAGE->context;
|
|
if (!$context) {
|
|
// We did not find any context? weird.
|
|
throw new \coding_exception(
|
|
'Unable to identify context for format_string()',
|
|
);
|
|
}
|
|
}
|
|
|
|
// Calculate md5.
|
|
$cachekeys = [
|
|
$string,
|
|
$striplinks,
|
|
$context->id,
|
|
$escape,
|
|
current_language(),
|
|
$filter,
|
|
];
|
|
$md5 = md5(implode('<+>', $cachekeys));
|
|
|
|
// Fetch from cache if possible.
|
|
if (array_key_exists($md5, $this->formatstringcache)) {
|
|
return $this->formatstringcache[$md5];
|
|
}
|
|
|
|
// First replace all ampersands not followed by html entity code
|
|
// Regular expression moved to its own method for easier unit testing.
|
|
if ($escape) {
|
|
$string = replace_ampersands_not_followed_by_entity($string);
|
|
}
|
|
|
|
if (!empty($this->get_filterall()) && $filter) {
|
|
$filtermanager = \filter_manager::instance();
|
|
$filtermanager->setup_page_for_filters($PAGE, $context); // Setup global stuff filters may have.
|
|
$string = $filtermanager->filter_string($string, $context);
|
|
}
|
|
|
|
// If the site requires it, strip ALL tags from this string.
|
|
if (!empty($this->get_striptags())) {
|
|
if ($escape) {
|
|
$string = str_replace(['<', '>'], ['<', '>'], strip_tags($string));
|
|
} else {
|
|
$string = strip_tags($string);
|
|
}
|
|
} else {
|
|
// Otherwise strip just links if that is required (default).
|
|
if ($striplinks) {
|
|
// Strip links in string.
|
|
$string = strip_links($string);
|
|
}
|
|
$string = clean_text($string);
|
|
}
|
|
|
|
// Store to cache.
|
|
$this->formatstringcache[$md5] = $string;
|
|
|
|
return $string;
|
|
}
|
|
|
|
/**
|
|
* Given text in a variety of format codings, this function returns the text as safe HTML.
|
|
*
|
|
* This function should mainly be used for long strings like posts,
|
|
* answers, glossary items etc. For short strings {@link format_string()}.
|
|
*
|
|
* @param null|string $text The text to be formatted. This is raw text originally from user input.
|
|
* @param string $format Identifier of the text format to be used
|
|
* [FORMAT_MOODLE, FORMAT_HTML, FORMAT_PLAIN, FORMAT_MARKDOWN]
|
|
* @param null|context $context The context used for filtering
|
|
* @param bool $trusted If true the string won't be cleaned.
|
|
* Note: FORMAT_MARKDOWN does not support trusted text.
|
|
* @param null|bool $clean If true the string will be cleaned.
|
|
* Note: This parameter is overridden if the text is trusted
|
|
* @param bool $filter If true the string will be run through applicable filters as well.
|
|
* @param bool $para If true then the returned string will be wrapped in div tags.
|
|
* @param bool $newlines If true then lines newline breaks will be converted to HTML newline breaks.
|
|
* @param bool $overflowdiv If set to true the formatted text will be encased in a div
|
|
* @param bool $blanktarget If true all <a> tags will have target="_blank" added unless target is explicitly specified.
|
|
* @param bool $allowid If true then id attributes will not be removed, even when using htmlpurifier.
|
|
* @return string
|
|
*/
|
|
public function format_text(
|
|
?string $text,
|
|
string $format = FORMAT_MOODLE,
|
|
?context $context = null,
|
|
bool $trusted = false,
|
|
?bool $clean = null,
|
|
bool $filter = true,
|
|
bool $para = true,
|
|
bool $newlines = true,
|
|
bool $overflowdiv = false,
|
|
bool $blanktarget = false,
|
|
bool $allowid = false,
|
|
): string {
|
|
global $CFG, $PAGE;
|
|
|
|
if ($text === '' || is_null($text)) {
|
|
// No need to do any filters and cleaning.
|
|
return '';
|
|
}
|
|
|
|
if ($format == FORMAT_MARKDOWN) {
|
|
// Markdown format cannot be trusted in trusttext areas,
|
|
// because we do not know how to sanitise it before editing.
|
|
$trusted = false;
|
|
}
|
|
if ($clean === null) {
|
|
if ($trusted && trusttext_active()) {
|
|
// No cleaning if text trusted and clean not specified.
|
|
$clean = false;
|
|
} else {
|
|
$clean = true;
|
|
}
|
|
}
|
|
if (!empty($this->get_forceclean())) {
|
|
// Whatever the caller claims, the admin wants all content cleaned anyway.
|
|
$clean = true;
|
|
}
|
|
|
|
// Calculate best context.
|
|
if (!$this->should_filter_string()) {
|
|
// Do not filter anything during installation or before upgrade completes.
|
|
$context = null;
|
|
} else if ($context === null) {
|
|
// Fallback to $PAGE->context this may be problematic in CLI and other non-standard pages.
|
|
// In future we may want to add debugging here.
|
|
$context = $PAGE->context;
|
|
}
|
|
|
|
if (!$context) {
|
|
// Either install/upgrade or something has gone really wrong because context does not exist (yet?).
|
|
$filter = false;
|
|
}
|
|
|
|
if ($filter) {
|
|
$filtermanager = \filter_manager::instance();
|
|
$filtermanager->setup_page_for_filters($PAGE, $context); // Setup global stuff filters may have.
|
|
$filteroptions = [
|
|
'originalformat' => $format,
|
|
'noclean' => !$clean,
|
|
];
|
|
} else {
|
|
$filtermanager = new \null_filter_manager();
|
|
$filteroptions = [];
|
|
}
|
|
|
|
switch ($format) {
|
|
case FORMAT_HTML:
|
|
$filteroptions['stage'] = 'pre_format';
|
|
$text = $filtermanager->filter_text($text, $context, $filteroptions);
|
|
// Text is already in HTML format, so just continue to the next filtering stage.
|
|
$filteroptions['stage'] = 'pre_clean';
|
|
$text = $filtermanager->filter_text($text, $context, $filteroptions);
|
|
if ($clean) {
|
|
$text = clean_text($text, FORMAT_HTML, [
|
|
'allowid' => $allowid,
|
|
]);
|
|
}
|
|
$filteroptions['stage'] = 'post_clean';
|
|
$text = $filtermanager->filter_text($text, $context, $filteroptions);
|
|
break;
|
|
|
|
case FORMAT_PLAIN:
|
|
$text = s($text); // Cleans dangerous JS.
|
|
$text = rebuildnolinktag($text);
|
|
$text = str_replace(' ', ' ', $text);
|
|
$text = nl2br($text);
|
|
break;
|
|
|
|
case FORMAT_MARKDOWN:
|
|
$filteroptions['stage'] = 'pre_format';
|
|
$text = $filtermanager->filter_text($text, $context, $filteroptions);
|
|
$text = markdown_to_html($text);
|
|
$filteroptions['stage'] = 'pre_clean';
|
|
$text = $filtermanager->filter_text($text, $context, $filteroptions);
|
|
if ($clean) {
|
|
$text = clean_text($text, FORMAT_HTML, [
|
|
'allowid' => $allowid,
|
|
]);
|
|
}
|
|
$filteroptions['stage'] = 'post_clean';
|
|
$text = $filtermanager->filter_text($text, $context, $filteroptions);
|
|
break;
|
|
|
|
case FORMAT_MOODLE:
|
|
$filteroptions['stage'] = 'pre_format';
|
|
$text = $filtermanager->filter_text($text, $context, $filteroptions);
|
|
$text = text_to_html($text, null, $para, $newlines);
|
|
$filteroptions['stage'] = 'pre_clean';
|
|
$text = $filtermanager->filter_text($text, $context, $filteroptions);
|
|
if ($clean) {
|
|
$text = clean_text($text, FORMAT_HTML, [
|
|
'allowid' => $allowid,
|
|
]);
|
|
}
|
|
$filteroptions['stage'] = 'post_clean';
|
|
$text = $filtermanager->filter_text($text, $context, $filteroptions);
|
|
break;
|
|
default: // FORMAT_MOODLE or anything else.
|
|
throw new \coding_exception("Unkown format passed to format_text: {$format}");
|
|
}
|
|
|
|
if ($filter) {
|
|
// At this point there should not be any draftfile links any more,
|
|
// this happens when developers forget to post process the text.
|
|
// The only potential problem is that somebody might try to format
|
|
// the text before storing into database which would be itself big bug.
|
|
$text = str_replace("\"$CFG->wwwroot/draftfile.php", "\"$CFG->wwwroot/brokenfile.php#", $text);
|
|
|
|
if ($CFG->debugdeveloper) {
|
|
if (strpos($text, '@@PLUGINFILE@@/') !== false) {
|
|
debugging(
|
|
'Before calling format_text(), the content must be processed with file_rewrite_pluginfile_urls()',
|
|
DEBUG_DEVELOPER
|
|
);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!empty($overflowdiv)) {
|
|
$text = \html_writer::tag('div', $text, ['class' => 'no-overflow']);
|
|
}
|
|
|
|
if ($blanktarget) {
|
|
$domdoc = new \DOMDocument();
|
|
libxml_use_internal_errors(true);
|
|
$domdoc->loadHTML('<?xml version="1.0" encoding="UTF-8" ?>' . $text);
|
|
libxml_clear_errors();
|
|
foreach ($domdoc->getElementsByTagName('a') as $link) {
|
|
if ($link->hasAttribute('target') && strpos($link->getAttribute('target'), '_blank') === false) {
|
|
continue;
|
|
}
|
|
$link->setAttribute('target', '_blank');
|
|
if (strpos($link->getAttribute('rel'), 'noreferrer') === false) {
|
|
$link->setAttribute('rel', trim($link->getAttribute('rel') . ' noreferrer'));
|
|
}
|
|
}
|
|
|
|
// This regex is nasty and I don't like it. The correct way to solve this is by loading the HTML like so:
|
|
// $domdoc->loadHTML($text, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD); however it seems like some libxml
|
|
// versions don't work properly and end up leaving <html><body>, so I'm forced to use
|
|
// this regex to remove those tags as a preventive measure.
|
|
$text = trim(preg_replace(
|
|
'~<(?:!DOCTYPE|/?(?:html|body))[^>]*>\s*~i',
|
|
'',
|
|
$domdoc->saveHTML($domdoc->documentElement),
|
|
));
|
|
}
|
|
|
|
return $text;
|
|
}
|
|
|
|
/**
|
|
* Set the value of the forceclean setting.
|
|
*
|
|
* @param bool $forceclean
|
|
* @return self
|
|
*/
|
|
public function set_forceclean(bool $forceclean): self {
|
|
$this->forceclean = $forceclean;
|
|
|
|
return $this;
|
|
}
|
|
|
|
/**
|
|
* Get the current forceclean value.
|
|
*
|
|
* @return bool
|
|
*/
|
|
public function get_forceclean(): bool {
|
|
global $CFG;
|
|
|
|
if (isset($this->forceclean)) {
|
|
return $this->forceclean;
|
|
}
|
|
|
|
if (isset($CFG->forceclean)) {
|
|
return $CFG->forceclean;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* Set the value of the striptags setting.
|
|
*
|
|
* @param bool $striptags
|
|
* @return formatting
|
|
*/
|
|
public function set_striptags(bool $striptags): self {
|
|
$this->striptags = $striptags;
|
|
|
|
return $this;
|
|
}
|
|
|
|
/**
|
|
* Get the current striptags value.
|
|
*
|
|
* Reverts to CFG->formatstringstriptags if not set.
|
|
*
|
|
* @return bool
|
|
*/
|
|
public function get_striptags(): bool {
|
|
global $CFG;
|
|
|
|
if (isset($this->striptags)) {
|
|
return $this->striptags;
|
|
}
|
|
|
|
return $CFG->formatstringstriptags;
|
|
}
|
|
|
|
/**
|
|
* Set the value of the filterall setting.
|
|
*
|
|
* @param bool $filterall
|
|
* @return formatting
|
|
*/
|
|
public function set_filterall(bool $filterall): self {
|
|
$this->filterall = $filterall;
|
|
|
|
return $this;
|
|
}
|
|
|
|
/**
|
|
* Get the current filterall value.
|
|
*
|
|
* Reverts to CFG->filterall if not set.
|
|
*
|
|
* @return bool
|
|
*/
|
|
public function get_filterall(): bool {
|
|
global $CFG;
|
|
|
|
if (isset($this->filterall)) {
|
|
return $this->filterall;
|
|
}
|
|
|
|
return $CFG->filterall;
|
|
}
|
|
|
|
/**
|
|
* During initial install, or upgrade from a really old version of Moodle, we should not filter strings at all.
|
|
*
|
|
* @return bool
|
|
*/
|
|
protected function should_filter_string(): bool {
|
|
global $CFG;
|
|
|
|
if (empty($CFG->version) || $CFG->version < 2013051400 || during_initial_install()) {
|
|
// Do not filter anything during installation or before upgrade completes.
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
}
|