diff --git a/NEWS b/NEWS index 1a57d65c..b9c97098 100644 --- a/NEWS +++ b/NEWS @@ -26,6 +26,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier ! New directive %Attr.DisableURI, which eliminates all hyperlinking ! New directive %URI.Munge, munges URI so you can use some sort of redirector service to avoid PageRank leaks or warn users that they are exiting your site. +! Added spiffy new smoketest printDefinition.php, which lets you twiddle with + the configuration settings and see how the internal rules are affected. - Added missing type to ChildDef_Chameleon - Remove Tidy option from demo if there is not Tidy available . ChildDef_Required guards against empty tags @@ -33,6 +35,8 @@ NEWS ( CHANGELOG and HISTORY ) HTMLPurifier . Added peace-of-mind variable initialization to Strategy_FixNesting . Added HTMLPurifier->info_parent_def, parent child processing made special . Added internal documents briefly summarizing future progression of HTML +. HTMLPurifier_Config->getBatch($namespace) added +. More lenient casting to bool from string in HTMLPurifier_ConfigSchema 1.2.1, unknown release date (bugfix/minor feature release, may be dropped if 1.2.0 is stable) diff --git a/docs/examples/demo.php b/docs/examples/demo.php index 3386679e..e8749268 100644 --- a/docs/examples/demo.php +++ b/docs/examples/demo.php @@ -26,11 +26,11 @@ if (empty($_REQUEST['strict'])) { ?>
-Welcome to the live demo. Enter some HTML and see how HTMLPurifier +
Welcome to the live demo. Enter some HTML and see how HTML Purifier will filter it.
-Return to HTMLPurifier's home page. +
Return to HTML Purifier's home page. Try the form in GET and POST request flavors (GET is easy to validate with W3C, but POST allows larger inputs).
diff --git a/library/HTMLPurifier/Config.php b/library/HTMLPurifier/Config.php index aa63ea41..c6a5eba1 100644 --- a/library/HTMLPurifier/Config.php +++ b/library/HTMLPurifier/Config.php @@ -68,6 +68,19 @@ class HTMLPurifier_Config return $this->conf[$namespace][$key]; } + /** + * Retreives an array of directives to values from a given namespace + * @param $namespace String namespace + */ + function getBatch($namespace) { + if (!isset($this->def->info[$namespace])) { + trigger_error('Cannot retrieve undefined namespace', + E_USER_WARNING); + return; + } + return $this->conf[$namespace]; + } + /** * Sets a value to configuration. * @param $namespace String namespace @@ -134,6 +147,7 @@ class HTMLPurifier_Config */ function loadArray($config_array) { foreach ($config_array as $key => $value) { + $key = str_replace('_', '.', $key); if (strpos($key, '.') !== false) { // condensed form list($namespace, $directive) = explode('.', $key); diff --git a/library/HTMLPurifier/ConfigSchema.php b/library/HTMLPurifier/ConfigSchema.php index 79633b20..ac78e80d 100644 --- a/library/HTMLPurifier/ConfigSchema.php +++ b/library/HTMLPurifier/ConfigSchema.php @@ -247,11 +247,20 @@ class HTMLPurifier_ConfigSchema { case 'bool': if (is_int($var) && ($var === 0 || $var === 1)) { $var = (bool) $var; + } elseif (is_string($var)) { + if ($var == 'on' || $var == 'true' || $var == '1') { + $var = true; + } elseif ($var == 'off' || $var == 'false' || $var == '0') { + $var = false; + } else { + break; + } } elseif (!is_bool($var)) break; return $var; case 'list': case 'hash': case 'lookup': + if (is_string($var)) $var = explode(',',$var); if (!is_array($var)) break; $keys = array_keys($var); if ($keys === array_keys($keys)) { diff --git a/library/HTMLPurifier/Printer.php b/library/HTMLPurifier/Printer.php new file mode 100644 index 00000000..4f421d8a --- /dev/null +++ b/library/HTMLPurifier/Printer.php @@ -0,0 +1,125 @@ +generator = new HTMLPurifier_Generator(); + } + + /** + * Main function that renders object or aspect of that object + * @param $config Configuration object + */ + function render($config) {} + + /** + * Returns a start tag + * @param $tag Tag name + * @param $attr Attribute array + */ + function start($tag, $attr = array()) { + return $this->generator->generateFromToken( + new HTMLPurifier_Token_Start($tag, $attr ? $attr : array()) + ); + } + + /** + * Returns an end teg + * @param $tag Tag name + */ + function end($tag) { + return $this->generator->generateFromToken( + new HTMLPurifier_Token_End($tag) + ); + } + + /** + * Prints a complete element with content inside + * @param $tag Tag name + * @param $contents Element contents + * @param $attr Tag attributes + * @param $escape Bool whether or not to escape contents + */ + function element($tag, $contents, $attr = array(), $escape = true) { + return $this->start($tag, $attr) . + ($escape ? $this->escape($contents) : $contents) . + $this->end($tag); + } + + /** + * Prints a simple key/value row in a table. + * @param $name Key + * @param $value Value + */ + function row($name, $value) { + if (is_bool($value)) $value = $value ? 'On' : 'Off'; + return + $this->start('tr') . "\n" . + $this->element('th', $name) . "\n" . + $this->element('td', $value) . "\n" . + $this->end('tr') + ; + } + + /** + * Escapes a string for HTML output. + * @param $string String to escape + */ + function escape($string) { + $string = HTMLPurifier_Encoder::cleanUTF8($string); + $string = htmlspecialchars($string, ENT_COMPAT, 'UTF-8'); + return $string; + } + + /** + * Takes a list of strings and turns them into a single list + * @param $array List of strings + * @param $polite Bool whether or not to add an end before the last + */ + function listify($array, $polite = false) { + if (empty($array)) return 'None'; + $ret = ''; + $i = count($array); + foreach ($array as $value) { + $i--; + $ret .= $value; + if ($i > 0 && !($polite && $i == 1)) $ret .= ', '; + if ($polite && $i == 1) $ret .= 'and '; + } + return $ret; + } + + /** + * Retrieves the class of an object without prefixes + * @param $obj Object to determine class of + * @param $prefix Further prefix to remove + */ + function getClass($obj, $prefix = '') { + static $five = null; + if ($five === null) $five = version_compare(PHP_VERSION, '5', '>='); + $prefix = 'HTMLPurifier_' . $prefix; + if (!$five) $prefix = strtolower($prefix); + return str_replace($prefix, '', get_class($obj)); + } + +} + +?> \ No newline at end of file diff --git a/library/HTMLPurifier/Printer/CSSDefinition.php b/library/HTMLPurifier/Printer/CSSDefinition.php new file mode 100644 index 00000000..75dff10a --- /dev/null +++ b/library/HTMLPurifier/Printer/CSSDefinition.php @@ -0,0 +1,10 @@ +To be implemented.';} + +} + +?> \ No newline at end of file diff --git a/library/HTMLPurifier/Printer/HTMLDefinition.php b/library/HTMLPurifier/Printer/HTMLDefinition.php new file mode 100644 index 00000000..c002f1ad --- /dev/null +++ b/library/HTMLPurifier/Printer/HTMLDefinition.php @@ -0,0 +1,184 @@ +config =& $config; + $this->def =& $config->getHTMLDefinition(); + $def =& $this->def; + + $ret .= $this->start('div', array('class' => 'HTMLPurifier_Printer')); + $ret .= $this->start('table') . "\n"; + $ret .= $this->element('caption', 'Environment'); + + $ret .= $this->row('Parent of fragment', $def->info_parent) . "\n"; + $ret .= $this->row('Strict mode', $def->strict) . "\n"; + if ($def->strict) $ret .= $this->row('Block wrap name', $def->info_block_wrapper) . "\n"; + + $ret .= $this->start('tr'); + $ret .= $this->element('th', 'Global attributes'); + $ret .= $this->element('td', $this->listifyAttr($def->info_global_attr),0,0); + $ret .= $this->end('tr'); + + $ret .= $this->renderChildren($def->info_parent_def->child); + + $ret .= $this->start('tr'); + $ret .= $this->element('th', 'Tag transforms'); + $list = array(); + foreach ($def->info_tag_transform as $old => $new) { + $new = $this->getClass($new, 'TagTransform_'); + $list[] = "<$old> with $new"; + } + $ret .= $this->element('td', $this->listify($list)); + $ret .= $this->end('tr'); + + $ret .= $this->start('tr'); + $ret .= $this->element('th', 'Pre-AttrTransform'); + $ret .= $this->element('td', $this->listifyObjectList($def->info_attr_transform_pre)); + $ret .= $this->end('tr'); + + $ret .= $this->start('tr'); + $ret .= $this->element('th', 'Post-AttrTransform'); + $ret .= $this->element('td', $this->listifyObjectList($def->info_attr_transform_post)); + $ret .= $this->end('tr'); + + $ret .= $this->end('table') . "\n"; + + $ret .= $this->renderInfo() . "\n"; + + $ret .= $this->end('div'); + + return $ret; + } + + function renderInfo() { + $ret = ''; + $ret .= $this->start('table') . "\n"; + $ret .= $this->element('caption', 'Elements ($info)'); + ksort($this->def->info); + $ret .= $this->start('tr'); + $ret .= $this->element('th', 'Allowed tags', array('colspan' => 2, 'class' => 'heavy')); + $ret .= $this->end('tr'); + $ret .= $this->start('tr'); + $ret .= $this->element('td', $this->listifyTagLookup($this->def->info), array('colspan' => 2)); + $ret .= $this->end('tr'); + foreach ($this->def->info as $name => $def) { + $ret .= $this->start('tr'); + $ret .= $this->element('th', "<$name>", array('class'=>'heavy', 'colspan' => 2)); + $ret .= $this->end('tr'); + $ret .= $this->start('tr'); + $ret .= $this->element('th', 'Type'); + $ret .= $this->element('td', ucfirst($def->type)); + $ret .= $this->end('tr'); + if (!empty($def->excludes)) { + $ret .= $this->start('tr'); + $ret .= $this->element('th', 'Excludes'); + $ret .= $this->element('td', $this->listifyTagLookup($def->excludes)); + $ret .= $this->end('tr'); + } + if (!empty($def->attr_transform_pre)) { + $ret .= $this->start('tr'); + $ret .= $this->element('th', 'Pre-AttrTransform'); + $ret .= $this->element('td', $this->listifyObjectList($def->attr_transform_pre)); + $ret .= $this->end('tr'); + } + if (!empty($def->attr_transform_post)) { + $ret .= $this->start('tr'); + $ret .= $this->element('th', 'Post-AttrTransform'); + $ret .= $this->element('td', $this->listifyObjectList($def->attr_transform_post)); + $ret .= $this->end('tr'); + } + if (!empty($def->auto_close)) { + $ret .= $this->start('tr'); + $ret .= $this->element('th', 'Auto closed by'); + $ret .= $this->element('td', $this->listifyTagLookup($def->auto_close)); + $ret .= $this->end('tr'); + } + $ret .= $this->start('tr'); + $ret .= $this->element('th', 'Allowed attributes'); + $ret .= $this->element('td',$this->listifyAttr($def->attr),0,0); + $ret .= $this->end('tr'); + + $ret .= $this->renderChildren($def->child); + } + $ret .= $this->end('table'); + return $ret; + } + + function renderChildren($def) { + $context = new HTMLPurifier_Context(); + $ret = ''; + $ret .= $this->start('tr'); + $elements = array(); + $attr = array(); + if (isset($def->elements)) { + if ($def->type == 'strictblockquote') $def->validateChildren(array(), $this->config, $context); + $elements = $def->elements; + } elseif ($def->type == 'chameleon') { + $attr['rowspan'] = 2; + } elseif ($def->type == 'empty') { + $elements = array(); + } elseif ($def->type == 'table') { + $elements = array('col', 'caption', 'colgroup', 'thead', + 'tfoot', 'tbody', 'tr'); + } + $ret .= $this->element('th', 'Allowed children', $attr); + + if ($def->type == 'chameleon') { + + $ret .= $this->element('td', + 'Block: ' . + $this->escape($this->listifyTagLookup($def->block->elements)),0,0); + $ret .= $this->end('tr'); + $ret .= $this->start('tr'); + $ret .= $this->element('td', + 'Inline: ' . + $this->escape($this->listifyTagLookup($def->inline->elements)),0,0); + + } else { + $ret .= $this->element('td', + ''.ucfirst($def->type).': ' . + $this->escape($this->listifyTagLookup($elements)),0,0); + } + $ret .= $this->end('tr'); + return $ret; + } + + function listifyTagLookup($array) { + $list = array(); + foreach ($array as $name => $discard) { + if ($name !== '#PCDATA' && !isset($this->def->info[$name])) continue; + $list[] = $name; + } + return $this->listify($list); + } + + function listifyObjectList($array) { + $list = array(); + foreach ($array as $discard => $obj) { + $list[] = $this->getClass($obj, 'AttrTransform_'); + } + return $this->listify($list); + } + + function listifyAttr($array) { + $list = array(); + foreach ($array as $name => $obj) { + if ($obj === false) continue; + $list[] = "$name = " . $this->getClass($obj, 'AttrDef_') . ''; + } + return $this->listify($list); + } + +} + +?> \ No newline at end of file diff --git a/smoketests/common.php b/smoketests/common.php index e01d7500..13cc6e59 100644 --- a/smoketests/common.php +++ b/smoketests/common.php @@ -2,8 +2,7 @@ header('Content-type: text/html; charset=UTF-8'); -set_include_path('../library' . PATH_SEPARATOR . get_include_path()); -require_once 'HTMLPurifier.php'; +require_once '../library/HTMLPurifier.auto.php'; function escapeHTML($string) { $string = HTMLPurifier_Encoder::cleanUTF8($string); diff --git a/smoketests/printDefinition.php b/smoketests/printDefinition.php new file mode 100644 index 00000000..f6d74274 --- /dev/null +++ b/smoketests/printDefinition.php @@ -0,0 +1,122 @@ + $value) { + if (!strncmp($key, 'Null_', 5) && !empty($value)) { + unset($get[substr($key, 5)]); + unset($get[$key]); + } +} + +@$config->loadArray($get); + +$printer_html_definition = new HTMLPurifier_Printer_HTMLDefinition(); +$printer_css_definition = new HTMLPurifier_Printer_CSSDefinition(); + +echo ''; +?> + + + +Pretty-print an object and see how it turns out.
+For more information, see Cheng Peng Su's original advisory. This particular exploit code appears only to work diff --git a/smoketests/xssAttacks.php b/smoketests/xssAttacks.php index 4fdace29..b0fec354 100644 --- a/smoketests/xssAttacks.php +++ b/smoketests/xssAttacks.php @@ -20,7 +20,7 @@ function formatCode($string) { "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
-XSS attacks are from http://ha.ckers.org/xss.html.
Caveats: