1
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-07-30 19:00:10 +02:00

Implement lang and xml:lang. Fixed a bunch of bugs too.

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@162 48356398-32a2-884e-a903-53898d9a118a
This commit is contained in:
Edward Z. Yang
2006-08-05 01:50:13 +00:00
parent 1945ddca5c
commit 8a23710405
11 changed files with 312 additions and 7 deletions

View File

@@ -5,7 +5,7 @@ class HTMLPurifier_AttrDef
{
function HTMLPurifier_AttrDef() {}
function validate() {
function validate($string, $config = null) {
trigger_error('Cannot call abstract function', E_USER_ERROR);
}

View File

@@ -0,0 +1,73 @@
<?php
require_once 'HTMLPurifier/AttrDef.php';
// built according to RFC 3066, which obsoleted RFC 1766
class HTMLPurifier_AttrDef_Lang extends HTMLPurifier_AttrDef
{
function validate($string) {
$string = trim($string);
if (!$string) return false;
$subtags = explode('-', $string);
$num_subtags = count($subtags);
if ($num_subtags == 0) return false; // sanity check
// process primary subtag : $subtags[0]
$length = strlen($subtags[0]);
switch ($length) {
case 0:
return false;
case 1:
if (! ($subtags[0] == 'x' || $subtags[0] == 'i') ) {
return false;
}
break;
case 2:
case 3:
if (! ctype_alpha($subtags[0]) ) {
return false;
} elseif (! ctype_lower($subtags[0]) ) {
$subtags[0] = strtolower($subtags[0]);
}
break;
default:
return false;
}
$new_string = $subtags[0];
if ($num_subtags == 1) return $new_string;
// process second subtag : $subtags[1]
$length = strlen($subtags[1]);
if ($length == 0 || $length == 1 || $length > 8 || !ctype_alnum($subtags[1])) {
return $new_string;
}
if (!ctype_lower($subtags[1])) $subtags[1] = strotolower($subtags[1]);
$new_string .= '-' . $subtags[1];
if ($num_subtags == 2) return $new_string;
// process all other subtags, index 2 and up
for ($i = 2; $i < $num_subtags; $i++) {
$length = strlen($subtags[$i]);
if ($length == 0 || $length > 8 || !ctype_alnum($subtags[$i])) {
return $new_string;
}
if (!ctype_lower($subtags[$i])) {
$subtags[$i] = strotolower($subtags[$i]);
}
$new_string .= '-' . $subtags[$i];
}
return $new_string;
}
}
?>

View File

@@ -0,0 +1,14 @@
<?php
// AttrTransform = Attribute Transformation, when handling one attribute
// isn't enough
class HTMLPurifier_AttrTransform
{
function HTMLPurifier_AttrTransform() {}
function transform($token, $config = null) {
trigger_error('Cannot call abstract function', E_USER_ERROR);
}
}
?>

View File

@@ -0,0 +1,31 @@
<?php
require_once 'HTMLPurifier/AttrTransform.php';
class HTMLPurifier_AttrTransform_Lang extends HTMLPurifier_AttrTransform
{
function transform($token) {
$lang = isset($token->attributes['lang']) ?
$token->attributes['lang'] : false;
$xml_lang = isset($token->attributes['xml:lang']) ?
$token->attributes['xml:lang'] : false;
if ($lang === false && $xml_lang == false) return $token;
$new_token = $token->copy();
if ($lang !== false && $xml_lang === false) {
$new_token->attributes['xml:lang'] = $lang;
} elseif ($xml_lang !== false) {
$new_token->attributes['lang'] = $xml_lang;
}
return $new_token;
}
}
?>

View File

@@ -45,6 +45,9 @@ class HTMLPurifier_Definition
// used solely by HTMLPurifier_Strategy_RemoveForeignElements
var $info_tag_transform = array();
// used solely by HTMLPurifier_Strategy_ValidateAttributes
var $info_attr_transform = array();
// WARNING! Prototype is not passed by reference, so in order to get
// a copy of the real one, you'll have to destroy your copy and
// use instance() to get it.
@@ -238,11 +241,22 @@ class HTMLPurifier_Definition
// which manually override these in their local definitions
$this->info_global_attr = array(
// core attrs
'id' => new HTMLPurifier_AttrDef_ID(),
'id' => new HTMLPurifier_AttrDef_ID(),
'class' => new HTMLPurifier_AttrDef_Class(),
'title' => new HTMLPurifier_AttrDef_Text(),
// i18n
'dir' => new HTMLPurifier_AttrDef_Enum(array('ltr','rtl'), false),
'dir' => new HTMLPurifier_AttrDef_Enum(array('ltr','rtl'), false),
'lang' => new HTMLPurifier_AttrDef_Lang(),
'xml:lang' => new HTMLPurifier_AttrDef_Lang(),
);
// required attribute stipulation handled in attribute transformation
$this->info['bdo']->attr = array();
$this->info['br']->attr = array(
'dir' => false,
'lang' => false,
'xml:lang' => false,
);
//////////////////////////////////////////////////////////////////////
@@ -275,9 +289,11 @@ class HTMLPurifier_Definition
// UNIMP : info[]->attr_transform : attribute transformations in elements
//////////////////////////////////////////////////////////////////////
// UNIMP : info_attr_transform : global attribute transform (for xml:lang)
// info_attr_transform : global attribute transformation that is
// unconditionally called. Good for transformations that have complex
// start conditions
// this might have bad implications for performance
$this->info_attr_transform[] = new HTMLPurifier_AttrTransform_Lang();
}

View File

@@ -26,11 +26,16 @@ class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy
$d_defs = $this->definition->info_global_attr;
foreach ($tokens as $key => $token) {
if ($token->type !== 'start' && $token->type !== 'end') continue;
if ($token->type !== 'start' && $token->type !== 'empty') continue;
// DEFINITION CALL
$defs = $this->definition->info[$token->name]->attr;
// DEFINITION CALL
foreach ($this->definition->info_attr_transform as $transformer) {
$token = $transformer->transform($token);
}
$attr = $token->attributes;
$changed = false;
foreach ($attr as $attr_key => $value) {

View File

@@ -59,7 +59,9 @@ class HTMLPurifier_Token_Tag extends HTMLPurifier_Token // abstract
if (!isset($attributes[$new_key])) {
$attributes[$new_key] = $attributes[$key];
}
unset($attributes[$key]);
if ($new_key !== $key) {
unset($attributes[$key]);
}
}
}
$this->attributes = $attributes;
@@ -72,6 +74,9 @@ class HTMLPurifier_Token_Tag extends HTMLPurifier_Token // abstract
class HTMLPurifier_Token_Start extends HTMLPurifier_Token_Tag
{
var $type = 'start';
function copy() {
return new HTMLPurifier_Token_Start($this->name, $this->attributes);
}
}
/**
@@ -80,6 +85,9 @@ class HTMLPurifier_Token_Start extends HTMLPurifier_Token_Tag
class HTMLPurifier_Token_Empty extends HTMLPurifier_Token_Tag
{
var $type = 'empty';
function copy() {
return new HTMLPurifier_Token_Empty($this->name, $this->attributes);
}
}
/**
@@ -92,6 +100,9 @@ class HTMLPurifier_Token_Empty extends HTMLPurifier_Token_Tag
class HTMLPurifier_Token_End extends HTMLPurifier_Token_Tag
{
var $type = 'end';
function copy() {
return new HTMLPurifier_Token_End($this->name);
}
}
/**
@@ -120,6 +131,9 @@ class HTMLPurifier_Token_Text extends HTMLPurifier_Token
$this->data = $data;
$this->is_whitespace = ctype_space($data);
}
function copy() {
return new HTMLPurifier_Token_Text($this->data);
}
}
@@ -138,6 +152,9 @@ class HTMLPurifier_Token_Comment extends HTMLPurifier_Token
function HTMLPurifier_Token_Comment($data) {
$this->data = $data;
}
function copy() {
return new HTMLPurifier_Token_Comment($this->data);
}
}
?>