1
0
mirror of https://github.com/ezyang/htmlpurifier.git synced 2025-08-04 21:28:06 +02:00

Compare commits

..

3 Commits

Author SHA1 Message Date
Edward Z. Yang
c768146e4d Gusev's proposed patch
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2013-10-12 21:24:38 -07:00
Edward Z. Yang
6e37ecd1c8 Make URI parsing algorithm more strict.
Thanks Michael Gusev <mgusev@sugarcrm.com> for contributing this patch.

Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2013-04-16 13:56:43 -07:00
Edward Z. Yang
20eff0a3a0 Fix NEWS entry.
Signed-off-by: Edward Z. Yang <ezyang@mit.edu>
2013-02-21 14:08:36 -08:00
9 changed files with 441 additions and 7 deletions

9
NEWS
View File

@@ -9,6 +9,11 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
. Internal change
==========================
4.6.0, unknown release date
# URI parsing algorithm was made more strict, so only prefixes which
looks like schemes will actually be schemes. Thanks
Michael Gusev <mgusev@sugarcrm.com> for fixing.
4.5.0, released 2013-02-17
# Fix bug where stacked attribute transforms clobber each other;
this also means it's no longer possible to override attribute
@@ -20,10 +25,10 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier
! Permit underscores in font families
! Support for page-break-* CSS3 properties when proprietary properties
are enabled.
! New directive %Core.EnableExcludes; can be set to 'false' to turn off
! New directive %Core.DisableExcludes; can be set to 'true' to turn off
SGML excludes checking. If HTML Purifier is removing too much text
and you don't care about full standards compliance, try setting this to
'false'.
'true'.
- Use prepend for SPL autoloading on PHP 5.3 and later.
- Fix bug with nofollow transform when pre-existing rel exists.
- Fix bug where background:url() always gets lower-cased

View File

@@ -19,6 +19,8 @@
*/
require 'HTMLPurifier.php';
require 'HTMLPurifier/Array.php';
require 'HTMLPurifier/ArrayNode.php';
require 'HTMLPurifier/AttrCollections.php';
require 'HTMLPurifier/AttrDef.php';
require 'HTMLPurifier/AttrTransform.php';
@@ -36,6 +38,7 @@ require 'HTMLPurifier/DefinitionCache.php';
require 'HTMLPurifier/DefinitionCacheFactory.php';
require 'HTMLPurifier/Doctype.php';
require 'HTMLPurifier/DoctypeRegistry.php';
require 'HTMLPurifier/DoublyLinkedList.php';
require 'HTMLPurifier/ElementDef.php';
require 'HTMLPurifier/Encoder.php';
require 'HTMLPurifier/EntityLookup.php';

View File

@@ -13,6 +13,8 @@
$__dir = dirname(__FILE__);
require_once $__dir . '/HTMLPurifier.php';
require_once $__dir . '/HTMLPurifier/Array.php';
require_once $__dir . '/HTMLPurifier/ArrayNode.php';
require_once $__dir . '/HTMLPurifier/AttrCollections.php';
require_once $__dir . '/HTMLPurifier/AttrDef.php';
require_once $__dir . '/HTMLPurifier/AttrTransform.php';
@@ -30,6 +32,7 @@ require_once $__dir . '/HTMLPurifier/DefinitionCache.php';
require_once $__dir . '/HTMLPurifier/DefinitionCacheFactory.php';
require_once $__dir . '/HTMLPurifier/Doctype.php';
require_once $__dir . '/HTMLPurifier/DoctypeRegistry.php';
require_once $__dir . '/HTMLPurifier/DoublyLinkedList.php';
require_once $__dir . '/HTMLPurifier/ElementDef.php';
require_once $__dir . '/HTMLPurifier/Encoder.php';
require_once $__dir . '/HTMLPurifier/EntityLookup.php';

View File

@@ -0,0 +1,184 @@
<?php
class HTMLPurifier_Array implements ArrayAccess
{
/**
* @param HTMLPurifier_ArrayNode
*/
public $head = null;
/**
* @var int
*/
protected $count = 0;
/**
* @var int
*/
protected $offset = 0;
/**
* @var HTMLPurifier_ArrayNode
*/
protected $offsetItem = null;
public function __construct(array $array = array())
{
/**
* @var HTMLPurifier_ArrayNode $temp
*/
$temp = null;
$i = 0;
foreach ($array as &$v) {
$item = new HTMLPurifier_ArrayNode($v);
if ($this->head == null) {
$this->head = &$item;
}
if ($temp instanceof HTMLPurifier_ArrayNode) {
$item->prev = &$temp;
$temp->next = &$item;
}
unset($temp);
$temp = &$item;
$i ++;
unset($item, $v);
}
$this->count = $i;
$this->offset = 0;
$this->offsetItem = &$this->head;
}
protected function findIndex($offset)
{
if ($this->head == null) {
return array(
'correct' => false,
'value' => null
);
}
$current = &$this->head;
$index = 0;
if ($this->offset <= $offset && $this->offsetItem instanceof HTMLPurifier_ArrayNode) {
$current = &$this->offsetItem;
$index = $this->offset;
}
while ($current->next instanceof HTMLPurifier_ArrayNode && $index != $offset) {
$current = &$current->next;
$index ++;
}
if ($index == $offset) {
$this->offset = $offset;
$this->offsetItem = &$current;
return array(
'correct' => true,
'value' => &$current
);
}
return array(
'correct' => false,
'value' => &$current
);
}
public function insertBefore($offset, $value)
{
$result = $this->findIndex($offset);
$this->count ++;
$item = new HTMLPurifier_ArrayNode($value);
if ($result['correct'] == false) {
if ($result['value'] instanceof HTMLPurifier_ArrayNode) {
$result['value']->next = &$item;
$item->prev = &$result['value'];
}
} else {
if ($result['value'] instanceof HTMLPurifier_ArrayNode) {
$item->prev = &$result['value']->prev;
$item->next = &$result['value'];
}
if ($item->prev instanceof HTMLPurifier_ArrayNode) {
$item->prev->next = &$item;
}
if ($result['value'] instanceof HTMLPurifier_ArrayNode) {
$result['value']->prev = &$item;
}
}
if ($offset == 0) {
$this->head = &$item;
}
if ($offset <= $this->offset && $this->offsetItem instanceof HTMLPurifier_ArrayNode) {
$this->offsetItem = &$this->offsetItem->prev;
}
}
public function remove($offset)
{
$result = $this->findIndex($offset);
if ($result['correct']) {
$this->count --;
$item = $result['value'];
$item->prev->next = &$result['value']->next;
$item->next->prev = &$result['value']->prev;
if ($offset == 0) {
$this->head = &$item->next;
}
if ($offset < $this->offset) {
$this->offset --;
} elseif ($offset == $this->offset) {
$this->offsetItem = &$item->next;
}
}
}
public function getArray()
{
$return = array();
$head = $this->head;
while ($head instanceof HTMLPurifier_ArrayNode) {
$return[] = $head->value;
$head = &$head->next;
}
return $return;
}
public function offsetExists($offset)
{
return $offset >= 0 && $offset < $this->count;
}
public function offsetGet($offset)
{
$result = $this->findIndex($offset);
if ($result['correct']) {
return $result['value']->value;
}
return null;
}
public function offsetSet($offset, $value)
{
$result = $this->findIndex($offset);
if ($result['correct']) {
$result['value']->value = &$value;
}
}
public function offsetUnset($offset)
{
$this->remove($offset);
}
}

View File

@@ -0,0 +1,24 @@
<?php
class HTMLPurifier_ArrayNode
{
public function __construct(&$value)
{
$this->value = &$value;
}
/**
* @var HTMLPurifier_ArrayNode
*/
public $prev = null;
/**
* @var HTMLPurifier_ArrayNode
*/
public $next = null;
/**
* @var mixed
*/
public $value = null;
}

View File

@@ -45,7 +45,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
protected $context;
public function execute($tokens, $config, $context) {
$tokens = new HTMLPurifier_Array($tokens);
$definition = $config->getHTMLDefinition();
// local variables
@@ -453,7 +453,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
$context->destroy('CurrentToken');
unset($this->injectors, $this->stack, $this->tokens, $this->t);
return $tokens;
return $tokens->getArray();
}
/**
@@ -490,6 +490,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
// array(number nodes to delete, new node 1, new node 2, ...)
$delete = array_shift($token);
throw new Exception("unsupported");
$old = array_splice($this->tokens, $this->t, $delete, $token);
if ($injector > -1) {
@@ -508,7 +509,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
* this token. You must reprocess after this.
*/
private function insertBefore($token) {
array_splice($this->tokens, $this->t, 0, array($token));
$this->tokens->insertBefore($this->t, $token);
}
/**
@@ -516,7 +517,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
* occupied space. You must reprocess after this.
*/
private function remove() {
array_splice($this->tokens, $this->t, 1);
$this->tokens->remove($this->t);
}
/**

View File

@@ -30,7 +30,7 @@ class HTMLPurifier_URIParser
// Note that ["<>] are an addition to the RFC's recommended
// characters, because they represent external delimeters.
$r_URI = '!'.
'(([^:/?#"<>]+):)?'. // 2. Scheme
'(([a-zA-Z0-9\.\+\-]+):)?'. // 2. Scheme
'(//([^/?#"<>]*))?'. // 4. Authority
'([^?#"<>]*)'. // 5. Path
'(\?([^#"<>]*))?'. // 7. Query

View File

@@ -0,0 +1,207 @@
<?php
class HTMLPurifier_ArrayTest extends UnitTestCase
{
/**
* Data provider for the rest of tests
* @return array
*/
public function getData()
{
return array(
array(array()),
array(array(1, 2, 3, 4))
);
}
/**
* Testing of initialization of properties of HTMLPurifier_Array
*/
public function testConstruct()
{
$array = $this->getData();
$object = new HTMLPurifier_ArrayMock($array);
$this->assertEqual(0, $object->getOffset());
$this->assertEqual($object->getHead(), $object->getOffsetItem());
$this->assertEqual(count($array), $object->getCount());
$this->assertEqual($array, $object->getArray());
}
/**
* Testing of offset & offsetItem properties while seeking/removing/inserting
*/
public function testFindIndex()
{
$array = array(1, 2, 3, 4, 5);
$object = new HTMLPurifier_ArrayMock($array);
for ($i = 0; $i < $object->getCount(); $i ++) {
$object[$i];
$this->assertEqual($i, $object->getOffset());
$this->assertEqual($array[$i], $object->getOffsetItem()->value);
}
$object[2];
$this->assertEqual(2, $object->getOffset());
$this->assertEqual(3, $object->getOffsetItem()->value);
$object->remove(2);
$this->assertEqual(2, $object->getOffset());
$this->assertEqual(4, $object->getOffsetItem()->value);
$object[1];
$this->assertEqual(1, $object->getOffset());
$this->assertEqual(2, $object->getOffsetItem()->value);
$object->insertBefore(1, 'a');
$this->assertEqual(1, $object->getOffset());
$this->assertEqual('a', $object->getOffsetItem()->value);
}
/**
* Testing that behavior of insertBefore the same as array_splice
*/
public function testInsertBefore()
{
$array = $this->getData();
$object = new HTMLPurifier_ArrayMock($array);
$index = 0;
array_splice($array, $index, 0, array('a'));
$object->insertBefore($index, 'a');
$this->assertEqual($array, $object->getArray());
$index = 2;
array_splice($array, $index, 0, array('a'));
$object->insertBefore($index, 'a');
$this->assertEqual($array, $object->getArray());
$index = count($array) * 2;
array_splice($array, $index, 0, array('a'));
$object->insertBefore($index, 'a');
$this->assertEqual($array, $object->getArray());
}
/**
* Testing that behavior of remove the same as array_splice
*/
public function testRemove()
{
$array = $this->getData();
$object = new HTMLPurifier_ArrayMock($array);
$index = 0;
array_splice($array, $index, 1);
$object->remove($index);
$this->assertEqual($array, $object->getArray());
$index = 2;
array_splice($array, $index, 1);
$object->remove($index);
$this->assertEqual($array, $object->getArray());
$index = count($array) * 2;
array_splice($array, $index, 1);
$object->remove($index);
$this->assertEqual($array, $object->getArray());
}
/**
* Testing that object returns original array
*/
public function testGetArray()
{
$array = $this->getData();
$object = new HTMLPurifier_ArrayMock($array);
$this->assertEqual($array, $object->getArray());
}
/**
* Testing ArrayAccess interface
*/
public function testOffsetExists()
{
$array = $this->getData();
$object = new HTMLPurifier_ArrayMock($array);
$this->assertEqual(isset($array[0]), isset($object[0]));
}
/**
* Testing ArrayAccess interface
*/
public function testOffsetGet()
{
$array = array(1, 2, 3);
$object = new HTMLPurifier_ArrayMock($array);
foreach ($array as $k => $v) {
$this->assertEqual($v, $object[$k]);
}
}
/**
* Testing ArrayAccess interface
*/
public function testOffsetSet()
{
$array = array(1, 2, 3);
$object = new HTMLPurifier_ArrayMock($array);
foreach ($array as $k => $v) {
$v = $v * 2;
$object[$k] = $v;
$this->assertEqual($v, $object[$k]);
}
}
/**
* Testing ArrayAccess interface
* There is one difference: keys are updated as well, they are started from 0
*/
public function testOffsetUnset()
{
$object = new HTMLPurifier_ArrayMock(array(1, 2, 3, 4));
unset($object[1]);
$this->assertEqual(array(1, 3, 4), $object->getArray());
unset($object[0]);
$this->assertEqual(array(3, 4), $object->getArray());
unset($object[1]);
$this->assertEqual(array(3), $object->getArray());
unset($object[0]);
$this->assertEqual(array(), $object->getArray());
}
}
/**
* Mock for some protected properties of HTMLPurifier_Array
*/
class HTMLPurifier_ArrayMock extends HTMLPurifier_Array
{
/**
* @return HTMLPurifier_ArrayNode|null
*/
public function getHead()
{
return $this->head;
}
/**
* @return int
*/
public function getOffset()
{
return $this->offset;
}
/**
* @return int
*/
public function getCount()
{
return $this->count;
}
/**
* @return HTMLPurifier_ArrayNode|null
*/
public function getOffsetItem()
{
return $this->offsetItem;
}
}

View File

@@ -140,6 +140,13 @@ class HTMLPurifier_URIParserTest extends HTMLPurifier_Harness
);
}
function testEmbeddedColon() {
$this->assertParsing(
'{:test:}',
null, null, null, null, '{:test:}', null, null
);
}
}
// vim: et sw=4 sts=4