mirror of
https://github.com/moodle/moodle.git
synced 2025-04-21 16:32:18 +02:00
Merge branch 'w13_MDL-32109_m23_fastpurify' of git://github.com/skodak/moodle
This commit is contained in:
commit
5eee9f7ade
@ -193,6 +193,63 @@ class htmlpurifier_test extends UnitTestCase {
|
||||
$text = 'x<form></form>x';
|
||||
$this->assertIdentical('xx', purify_html($text));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test internal function used for clean_text() speedup.
|
||||
*/
|
||||
function test_is_purify_html_necessary() {
|
||||
// first our shortcuts
|
||||
$text = "";
|
||||
$this->assertFalse(is_purify_html_necessary($text));
|
||||
$this->assertidentical($text, purify_html($text));
|
||||
|
||||
$text = "666";
|
||||
$this->assertFalse(is_purify_html_necessary($text));
|
||||
$this->assertidentical($text, purify_html($text));
|
||||
|
||||
$text = "abc\ndef \" ' ";
|
||||
$this->assertFalse(is_purify_html_necessary($text));
|
||||
$this->assertidentical($text, purify_html($text));
|
||||
|
||||
$text = "abc\n<p>def</p>efg<p>hij</p>";
|
||||
$this->assertFalse(is_purify_html_necessary($text));
|
||||
$this->assertidentical($text, purify_html($text));
|
||||
|
||||
$text = "<br />abc\n<p>def<em>efg</em><strong>hi<br />j</strong></p>";
|
||||
$this->assertFalse(is_purify_html_necessary($text));
|
||||
$this->assertidentical($text, purify_html($text));
|
||||
|
||||
// now failures
|
||||
$text = " ";
|
||||
$this->assertTrue(is_purify_html_necessary($text));
|
||||
|
||||
$text = "Gin & Tonic";
|
||||
$this->assertTrue(is_purify_html_necessary($text));
|
||||
|
||||
$text = "Gin > Tonic";
|
||||
$this->assertTrue(is_purify_html_necessary($text));
|
||||
|
||||
$text = "Gin < Tonic";
|
||||
$this->assertTrue(is_purify_html_necessary($text));
|
||||
|
||||
$text = "<div>abc</div>";
|
||||
$this->assertTrue(is_purify_html_necessary($text));
|
||||
|
||||
$text = "<span>abc</span>";
|
||||
$this->assertTrue(is_purify_html_necessary($text));
|
||||
|
||||
$text = "<br>abc";
|
||||
$this->assertTrue(is_purify_html_necessary($text));
|
||||
|
||||
$text = "<p class='xxx'>abc</p>";
|
||||
$this->assertTrue(is_purify_html_necessary($text));
|
||||
|
||||
$text = "<p>abc<em></p></em>";
|
||||
$this->assertTrue(is_purify_html_necessary($text));
|
||||
|
||||
$text = "<p>abc";
|
||||
$this->assertTrue(is_purify_html_necessary($text));
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -189,5 +189,63 @@ class core_htmlpurifier_testcase extends basic_testcase {
|
||||
$text = 'x<form></form>x';
|
||||
$this->assertSame('xx', purify_html($text));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test internal function used for clean_text() speedup.
|
||||
* @return void
|
||||
*/
|
||||
function test_is_purify_html_necessary() {
|
||||
// first our shortcuts
|
||||
$text = "";
|
||||
$this->assertFalse(is_purify_html_necessary($text));
|
||||
$this->assertSame($text, purify_html($text));
|
||||
|
||||
$text = "666";
|
||||
$this->assertFalse(is_purify_html_necessary($text));
|
||||
$this->assertSame($text, purify_html($text));
|
||||
|
||||
$text = "abc\ndef \" ' ";
|
||||
$this->assertFalse(is_purify_html_necessary($text));
|
||||
$this->assertSame($text, purify_html($text));
|
||||
|
||||
$text = "abc\n<p>def</p>efg<p>hij</p>";
|
||||
$this->assertFalse(is_purify_html_necessary($text));
|
||||
$this->assertSame($text, purify_html($text));
|
||||
|
||||
$text = "<br />abc\n<p>def<em>efg</em><strong>hi<br />j</strong></p>";
|
||||
$this->assertFalse(is_purify_html_necessary($text));
|
||||
$this->assertSame($text, purify_html($text));
|
||||
|
||||
// now failures
|
||||
$text = " ";
|
||||
$this->assertTrue(is_purify_html_necessary($text));
|
||||
|
||||
$text = "Gin & Tonic";
|
||||
$this->assertTrue(is_purify_html_necessary($text));
|
||||
|
||||
$text = "Gin > Tonic";
|
||||
$this->assertTrue(is_purify_html_necessary($text));
|
||||
|
||||
$text = "Gin < Tonic";
|
||||
$this->assertTrue(is_purify_html_necessary($text));
|
||||
|
||||
$text = "<div>abc</div>";
|
||||
$this->assertTrue(is_purify_html_necessary($text));
|
||||
|
||||
$text = "<span>abc</span>";
|
||||
$this->assertTrue(is_purify_html_necessary($text));
|
||||
|
||||
$text = "<br>abc";
|
||||
$this->assertTrue(is_purify_html_necessary($text));
|
||||
|
||||
$text = "<p class='xxx'>abc</p>";
|
||||
$this->assertTrue(is_purify_html_necessary($text));
|
||||
|
||||
$text = "<p>abc<em></p></em>";
|
||||
$this->assertTrue(is_purify_html_necessary($text));
|
||||
|
||||
$text = "<p>abc";
|
||||
$this->assertTrue(is_purify_html_necessary($text));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1450,9 +1450,7 @@ function trusttext_active() {
|
||||
* @return string The cleaned up text
|
||||
*/
|
||||
function clean_text($text, $format = FORMAT_HTML, $options = array()) {
|
||||
if (empty($text) or is_numeric($text)) {
|
||||
return (string)$text;
|
||||
}
|
||||
$text = (string)$text;
|
||||
|
||||
if ($format != FORMAT_HTML and $format != FORMAT_HTML) {
|
||||
// TODO: we need to standardise cleanup of text when loading it into editor first
|
||||
@ -1463,7 +1461,9 @@ function clean_text($text, $format = FORMAT_HTML, $options = array()) {
|
||||
return $text;
|
||||
}
|
||||
|
||||
$text = purify_html($text, $options);
|
||||
if (is_purify_html_necessary($text)) {
|
||||
$text = purify_html($text, $options);
|
||||
}
|
||||
|
||||
// Originally we tried to neutralise some script events here, it was a wrong approach because
|
||||
// it was trivial to work around that (for example using style based XSS exploits).
|
||||
@ -1473,6 +1473,53 @@ function clean_text($text, $format = FORMAT_HTML, $options = array()) {
|
||||
return $text;
|
||||
}
|
||||
|
||||
/**
|
||||
* Is it necessary to use HTMLPurifier?
|
||||
* @private
|
||||
* @param string $text
|
||||
* @return bool false means html is safe and valid, true means use HTMLPurifier
|
||||
*/
|
||||
function is_purify_html_necessary($text) {
|
||||
if ($text === '') {
|
||||
return false;
|
||||
}
|
||||
|
||||
if ($text === (string)((int)$text)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (strpos($text, '&') !== false or preg_match('|<[^pesb/]|', $text)) {
|
||||
// we need to normalise entities or other tags except p, em, strong and br present
|
||||
return true;
|
||||
}
|
||||
|
||||
$altered = htmlspecialchars($text, ENT_NOQUOTES, 'UTF-8', true);
|
||||
if ($altered === $text) {
|
||||
// no < > or other special chars means this must be safe
|
||||
return false;
|
||||
}
|
||||
|
||||
// let's try to convert back some safe html tags
|
||||
$altered = preg_replace('|<p>(.*?)</p>|m', '<p>$1</p>', $altered);
|
||||
if ($altered === $text) {
|
||||
return false;
|
||||
}
|
||||
$altered = preg_replace('|<em>([^<>]+?)</em>|m', '<em>$1</em>', $altered);
|
||||
if ($altered === $text) {
|
||||
return false;
|
||||
}
|
||||
$altered = preg_replace('|<strong>([^<>]+?)</strong>|m', '<strong>$1</strong>', $altered);
|
||||
if ($altered === $text) {
|
||||
return false;
|
||||
}
|
||||
$altered = str_replace('<br />', '<br />', $altered);
|
||||
if ($altered === $text) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* KSES replacement cleaning function - uses HTML Purifier.
|
||||
*
|
||||
|
Loading…
x
Reference in New Issue
Block a user