From 7df50029b447957d52227c9a8019538eb8e8c68e Mon Sep 17 00:00:00 2001 From: Petr Skoda Date: Sat, 9 Apr 2011 10:27:51 +0200 Subject: [PATCH] MDL-25826 integrate HTMLPurifier 4.3.0 and improve performance The new HTMLPurifier finally caches the schema properly eliminating both extra CPU cycles and disk writes. The repeated dir exists tests might cause problems on NFS shares. --- lib/simpletest/testpurifier.php | 0 lib/thirdpartylibs.xml | 2 +- lib/weblib.php | 26 ++++++++++++++------------ 3 files changed, 15 insertions(+), 13 deletions(-) create mode 100644 lib/simpletest/testpurifier.php diff --git a/lib/simpletest/testpurifier.php b/lib/simpletest/testpurifier.php new file mode 100644 index 00000000000..e69de29bb2d diff --git a/lib/thirdpartylibs.xml b/lib/thirdpartylibs.xml index eb8ec851090..94a349e45e6 100644 --- a/lib/thirdpartylibs.xml +++ b/lib/thirdpartylibs.xml @@ -67,7 +67,7 @@ htmlpurifier HTML Purifier LGPL - 4.2.0 + 4.3.0 2.1+ diff --git a/lib/weblib.php b/lib/weblib.php index cb65a20d9de..9e1b939943e 100644 --- a/lib/weblib.php +++ b/lib/weblib.php @@ -1509,20 +1509,21 @@ function clean_text($text, $format = FORMAT_HTML, $options = array()) { function purify_html($text, $options = array()) { global $CFG; - // this can not be done only once because we sometimes need to reset the cache - $cachedir = $CFG->dataroot.'/cache/htmlpurifier'; - check_dir_exists($cachedir); - $type = !empty($options['allowid']) ? 'allowid' : 'normal'; static $purifiers = array(); if (empty($purifiers[$type])) { + + // make sure the serializer dir exists, it should be fine if it disappears later during cache reset + $cachedir = $CFG->dataroot.'/cache/htmlpurifier'; + check_dir_exists($cachedir); + require_once $CFG->libdir.'/htmlpurifier/HTMLPurifier.safe-includes.php'; $config = HTMLPurifier_Config::createDefault(); $config->set('HTML.DefinitionID', 'moodlehtml'); - $config->set('HTML.DefinitionRev', 1); + $config->set('HTML.DefinitionRev', 2); $config->set('Cache.SerializerPath', $cachedir); - //$config->set('Cache.SerializerPermission', $CFG->directorypermissions); // it would be nice to get this upstream + $config->set('Cache.SerializerPermissions', $CFG->directorypermissions); $config->set('Core.NormalizeNewlines', false); $config->set('Core.ConvertDocumentToFragment', true); $config->set('Core.Encoding', 'UTF-8'); @@ -1540,12 +1541,13 @@ function purify_html($text, $options = array()) { $config->set('Attr.EnableID', true); } - $def = $config->getHTMLDefinition(true); - $def->addElement('nolink', 'Block', 'Flow', array()); // skip our filters inside - $def->addElement('tex', 'Inline', 'Inline', array()); // tex syntax, equivalent to $$xx$$ - $def->addElement('algebra', 'Inline', 'Inline', array()); // algebra syntax, equivalent to @@xx@@ - $def->addElement('lang', 'Block', 'Flow', array(), array('lang'=>'CDATA')); // old anf future style multilang - only our hacked lang attribute - $def->addAttribute('span', 'xxxlang', 'CDATA'); // current problematic multilang + if ($def = $config->maybeGetRawHTMLDefinition()) { + $def->addElement('nolink', 'Block', 'Flow', array()); // skip our filters inside + $def->addElement('tex', 'Inline', 'Inline', array()); // tex syntax, equivalent to $$xx$$ + $def->addElement('algebra', 'Inline', 'Inline', array()); // algebra syntax, equivalent to @@xx@@ + $def->addElement('lang', 'Block', 'Flow', array(), array('lang'=>'CDATA')); // old and future style multilang - only our hacked lang attribute + $def->addAttribute('span', 'xxxlang', 'CDATA'); // current problematic multilang + } $purifier = new HTMLPurifier($config); $purifiers[$type] = $purifier;