MDL-25826 integrate HTMLPurifier 4.3.0 and improve performance

The new HTMLPurifier finally caches the schema properly eliminating both extra CPU cycles and disk writes. The repeated dir exists tests might cause problems on NFS shares.
This commit is contained in:
Petr Skoda 2011-04-09 10:27:51 +02:00
parent 88efb586b4
commit 7df50029b4
3 changed files with 15 additions and 13 deletions

View File

View File

@ -67,7 +67,7 @@
<location>htmlpurifier</location>
<name>HTML Purifier</name>
<license>LGPL</license>
<version>4.2.0</version>
<version>4.3.0</version>
<licenseversion>2.1+</licenseversion>
</library>
<library>

View File

@ -1509,20 +1509,21 @@ function clean_text($text, $format = FORMAT_HTML, $options = array()) {
function purify_html($text, $options = array()) {
global $CFG;
// this can not be done only once because we sometimes need to reset the cache
$cachedir = $CFG->dataroot.'/cache/htmlpurifier';
check_dir_exists($cachedir);
$type = !empty($options['allowid']) ? 'allowid' : 'normal';
static $purifiers = array();
if (empty($purifiers[$type])) {
// make sure the serializer dir exists, it should be fine if it disappears later during cache reset
$cachedir = $CFG->dataroot.'/cache/htmlpurifier';
check_dir_exists($cachedir);
require_once $CFG->libdir.'/htmlpurifier/HTMLPurifier.safe-includes.php';
$config = HTMLPurifier_Config::createDefault();
$config->set('HTML.DefinitionID', 'moodlehtml');
$config->set('HTML.DefinitionRev', 1);
$config->set('HTML.DefinitionRev', 2);
$config->set('Cache.SerializerPath', $cachedir);
//$config->set('Cache.SerializerPermission', $CFG->directorypermissions); // it would be nice to get this upstream
$config->set('Cache.SerializerPermissions', $CFG->directorypermissions);
$config->set('Core.NormalizeNewlines', false);
$config->set('Core.ConvertDocumentToFragment', true);
$config->set('Core.Encoding', 'UTF-8');
@ -1540,12 +1541,13 @@ function purify_html($text, $options = array()) {
$config->set('Attr.EnableID', true);
}
$def = $config->getHTMLDefinition(true);
$def->addElement('nolink', 'Block', 'Flow', array()); // skip our filters inside
$def->addElement('tex', 'Inline', 'Inline', array()); // tex syntax, equivalent to $$xx$$
$def->addElement('algebra', 'Inline', 'Inline', array()); // algebra syntax, equivalent to @@xx@@
$def->addElement('lang', 'Block', 'Flow', array(), array('lang'=>'CDATA')); // old anf future style multilang - only our hacked lang attribute
$def->addAttribute('span', 'xxxlang', 'CDATA'); // current problematic multilang
if ($def = $config->maybeGetRawHTMLDefinition()) {
$def->addElement('nolink', 'Block', 'Flow', array()); // skip our filters inside
$def->addElement('tex', 'Inline', 'Inline', array()); // tex syntax, equivalent to $$xx$$
$def->addElement('algebra', 'Inline', 'Inline', array()); // algebra syntax, equivalent to @@xx@@
$def->addElement('lang', 'Block', 'Flow', array(), array('lang'=>'CDATA')); // old and future style multilang - only our hacked lang attribute
$def->addAttribute('span', 'xxxlang', 'CDATA'); // current problematic multilang
}
$purifier = new HTMLPurifier($config);
$purifiers[$type] = $purifier;