From 9a9036c68902c968a25231c3e44276eecee684d3 Mon Sep 17 00:00:00 2001
From: Paul Stone
Date: Thu, 27 Aug 2009 20:42:09 -0400
Subject: [PATCH] Implement auto-formatter that removes empty span tags.
Signed-off-by: Paul Stone
Signed-off-by: Edward Z. Yang
---
library/HTMLPurifier.includes.php | 1 +
library/HTMLPurifier.safe-includes.php | 1 +
library/HTMLPurifier/ConfigSchema/schema.ser | Bin 12999 -> 13152 bytes
...utoFormat.RemoveSpansWithoutAttributes.txt | 11 ++
.../Injector/RemoveSpansWithoutAttributes.php | 60 +++++++++++
.../RemoveSpansWithoutAttributesTest.php | 99 ++++++++++++++++++
6 files changed, 172 insertions(+)
create mode 100755 library/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveSpansWithoutAttributes.txt
create mode 100755 library/HTMLPurifier/Injector/RemoveSpansWithoutAttributes.php
create mode 100755 tests/HTMLPurifier/Injector/RemoveSpansWithoutAttributesTest.php
diff --git a/library/HTMLPurifier.includes.php b/library/HTMLPurifier.includes.php
index 7cfb9706..df63d41b 100644
--- a/library/HTMLPurifier.includes.php
+++ b/library/HTMLPurifier.includes.php
@@ -176,6 +176,7 @@ require 'HTMLPurifier/Injector/DisplayLinkURI.php';
require 'HTMLPurifier/Injector/Linkify.php';
require 'HTMLPurifier/Injector/PurifierLinkify.php';
require 'HTMLPurifier/Injector/RemoveEmpty.php';
+require 'HTMLPurifier/Injector/RemoveSpansWithoutAttributes.php';
require 'HTMLPurifier/Injector/SafeObject.php';
require 'HTMLPurifier/Lexer/DOMLex.php';
require 'HTMLPurifier/Lexer/DirectLex.php';
diff --git a/library/HTMLPurifier.safe-includes.php b/library/HTMLPurifier.safe-includes.php
index cf2c1d61..d540ab1a 100644
--- a/library/HTMLPurifier.safe-includes.php
+++ b/library/HTMLPurifier.safe-includes.php
@@ -170,6 +170,7 @@ require_once $__dir . '/HTMLPurifier/Injector/DisplayLinkURI.php';
require_once $__dir . '/HTMLPurifier/Injector/Linkify.php';
require_once $__dir . '/HTMLPurifier/Injector/PurifierLinkify.php';
require_once $__dir . '/HTMLPurifier/Injector/RemoveEmpty.php';
+require_once $__dir . '/HTMLPurifier/Injector/RemoveSpansWithoutAttributes.php';
require_once $__dir . '/HTMLPurifier/Injector/SafeObject.php';
require_once $__dir . '/HTMLPurifier/Lexer/DOMLex.php';
require_once $__dir . '/HTMLPurifier/Lexer/DirectLex.php';
diff --git a/library/HTMLPurifier/ConfigSchema/schema.ser b/library/HTMLPurifier/ConfigSchema/schema.ser
index bbf12f9c3e7392aa8143727d2485f6f9ad1f97e1..082680cbf81edefb86ca08c588041f295809f28c 100644
GIT binary patch
delta 190
zcmX?}`XFtB8KdRq)BF|8+QybvN{*!^`EL0|xrrruL8-a
+ This directive causes span
tags without any attributes
+ to be removed. It will also remove spans that had all attributes
+ removed during processing.
+
+--# vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/Injector/RemoveSpansWithoutAttributes.php b/library/HTMLPurifier/Injector/RemoveSpansWithoutAttributes.php
new file mode 100755
index 00000000..88cc02b9
--- /dev/null
+++ b/library/HTMLPurifier/Injector/RemoveSpansWithoutAttributes.php
@@ -0,0 +1,60 @@
+attrValidator = new HTMLPurifier_AttrValidator();
+ $this->config = $config;
+ $this->context = $context;
+ return parent::prepare($config, $context);
+ }
+
+ public function handleElement(&$token) {
+ if ($token->name !== 'span' || !$token instanceof HTMLPurifier_Token_Start) {
+ return;
+ }
+
+ // We need to validate the attributes now since this doesn't normally
+ // happen until after MakeWellFormed. If all the attributes are removed
+ // the span needs to be removed too.
+ $this->attrValidator->validateToken($token, $this->config, $this->context);
+ $token->armor['ValidateAttributes'] = true;
+
+ if (!empty($token->attr)) {
+ return;
+ }
+
+ $nesting = 0;
+ $spanContentTokens = array();
+ while ($this->forwardUntilEndToken(&$i, &$current, &$nesting)) {}
+
+ if ($current instanceof HTMLPurifier_Token_End && $current->name === 'span') {
+ // Mark closing span tag for deletion
+ $current->markForDeletion = true;
+ // Delete open span tag
+ $token = false;
+ }
+ }
+
+ public function handleEnd(&$token) {
+ if ($token->markForDeletion) {
+ $token = false;
+ }
+ }
+}
+
+// vim: et sw=4 sts=4
diff --git a/tests/HTMLPurifier/Injector/RemoveSpansWithoutAttributesTest.php b/tests/HTMLPurifier/Injector/RemoveSpansWithoutAttributesTest.php
new file mode 100755
index 00000000..704c1cd7
--- /dev/null
+++ b/tests/HTMLPurifier/Injector/RemoveSpansWithoutAttributesTest.php
@@ -0,0 +1,99 @@
+config->set('HTML.Allowed', 'span[class],div,p,strong,em');
+ $this->config->set('AutoFormat.RemoveSpansWithoutAttributes', true);
+ }
+
+ function testSingleSpan() {
+ $this->assertResult(
+ 'foo',
+ 'foo'
+ );
+ }
+
+ function testSingleSpanWithAttributes() {
+ $this->assertResult(
+ 'foo',
+ 'foo'
+ );
+ }
+
+ function testSingleNestedSpan() {
+ $this->assertResult(
+ 'foo
',
+ 'foo
'
+ );
+ }
+
+ function testSingleNestedSpanWithAttributes() {
+ $this->assertResult(
+ 'foo
',
+ 'foo
'
+ );
+ }
+
+
+ function testSpanWithChildren() {
+ $this->assertResult(
+ 'foo bar baz',
+ 'foo bar baz'
+ );
+ }
+
+ function testSpanWithSiblings() {
+ $this->assertResult(
+ 'before inside after
',
+ 'before inside after
'
+ );
+ }
+
+ function testNestedSpanWithSiblingsAndChildren() {
+ $this->assertResult(
+ 'a b c d e
',
+ 'a b c d e
'
+ );
+ }
+
+ function testNestedSpansWithoutAttributes() {
+ $this->assertResult(
+ 'onetwothree',
+ 'onetwothree'
+ );
+ }
+
+ function testDeeplyNestedSpan() {
+ $this->assertResult(
+ '',
+ ''
+ );
+ }
+
+ function testSpanWithInvalidAttributes() {
+ $this->assertResult(
+ 'foo
',
+ 'foo
'
+ );
+ }
+
+ function testNestedAlternateSpans() {
+ $this->assertResult(
+'a b c d e f
+',
+'a b c d e f
+'
+ );
+ }
+
+ function testSpanWithSomeInvalidAttributes() {
+ $this->assertResult(
+ 'foo
',
+ 'foo
'
+ );
+ }
+}
+
+// vim: et sw=4 sts=4