更加安全而且简洁的strip_tags

This commit is contained in:
joyqi 2013-11-25 17:40:34 +08:00
parent 4ac9bb0022
commit 741bfab5b1

View File

@ -113,55 +113,6 @@ class Typecho_Common
return 0 === strpos($path, $safePath);
}
/**
* html标签过滤
*
* @access public
* @param string $tag 标签
* @param string $attrs 属性
* @return string
*/
public static function __tagFilter($tag, $attrs)
{
$suffix = '';
$tag = strtolower($tag);
if (false === strpos(self::$_allowableTags, "|{$tag}|")) {
return '';
}
if (!empty($attrs)) {
$result = self::__parseAtttrs($attrs);
$attrs = '';
foreach ($result as $name => $val) {
$quote = '';
$lname = strtolower($name);
$lval = self::__attrTrim($val, $quote);
if (in_array($lname, self::$_allowableAttributes[$tag])) {
$attrs .= ' ' . $name . (empty($val) ? '' : '=' . $val);
}
}
}
return "<{$tag}{$attrs}>";
}
/**
* 自闭合标签过滤
*
* @access public
* @param array $matches 匹配值
* @return string
*/
public static function __closeTagFilter($matches)
{
$tag = strtolower($matches[1]);
return false === strpos(self::$_allowableTags, "|{$tag}|") ? '' : "</{$tag}>";
}
/**
* 解析属性
*
@ -169,7 +120,7 @@ class Typecho_Common
* @param string $attrs 属性字符串
* @return array
*/
public static function __parseAtttrs($attrs)
public static function __parseAttrs($attrs)
{
$attrs = trim($attrs);
$len = strlen($attrs);
@ -223,30 +174,6 @@ class Typecho_Common
return $result;
}
/**
* 清除属性空格
*
* @access public
* @param string $attr 属性
* @param string $quote 引号
* @return string
*/
public static function __attrTrim($attr, &$quote)
{
$attr = trim($attr);
$attr_len = strlen($attr);
$quote = '';
if ($attr_len >= 2 &&
('"' == $attr[0] || "'" == $attr[0])
&& $attr[0] == $attr[$attr_len - 1]) {
$quote = $attr[0];
return trim(substr($attr, 1, -1));
}
return $attr;
}
/**
* 程序初始化方法
*
@ -633,73 +560,39 @@ EOF;
*/
public static function stripTags($html, $allowableTags = NULL)
{
if (!empty($allowableTags) && preg_match_all("/\<([a-z]+)([^>]*)\>/is", $allowableTags, $tags)) {
self::$_allowableTags = '|' . implode('|',
array_unique(array_map('trim', array_map('strtolower', $tags[1])))) . '|';
$normalizeTags = '';
$allowableAttributes = array();
if (in_array('code', $tags[1])) {
$html = self::lockHTML($html);
}
$normalizeTags = '<' . implode('><', $tags[1]) . '>';
$html = strip_tags($html, $normalizeTags);
if (!empty($allowableTags) && preg_match_all("/\<([_a-z0-9-]+)([^>]*)\>/is", $allowableTags, $tags)) {
$normalizeTags = '<' . implode('><', array_map('strtolower', $tags[1])) . '>';
$attributes = array_map('trim', $tags[2]);
$allowableAttributes = array();
foreach ($attributes as $key => $val) {
$allowableAttributes[$tags[1][$key]] = array_keys(self::__parseAtttrs($val));
$allowableAttributes[strtolower($tags[1][$key])] =
array_map('strtolower', array_keys(self::__parseAttrs($val)));
}
self::$_allowableAttributes = $allowableAttributes;
}
$len = strlen($html);
$tag = '';
$attrs = '';
$pos = -1;
$quote = '';
$start = 0;
for ($i = 0; $i < $len; $i ++) {
if ('<' == $html[$i] && -1 == $pos) {
$start = $i;
$pos = 0;
} else if (0 == $pos && '/' == $html[$i] && empty($tag)) {
$pos = -1;
} else if (0 == $pos && ctype_alpha($html[$i])) {
$tag .= $html[$i];
} else if (0 == $pos && ctype_space($html[$i])) {
$pos = 1;
} else if (1 == $pos && (!empty($quote) || '>' != $html[$i])) {
if (empty($quote) && ('"' == $html[$i] || "'" == $html[$i])) {
$quote = $html[$i];
} else if (!empty($quote) && $quote == $html[$i]) {
$quote = '';
}
$attrs .= $html[$i];
} else if (-1 != $pos && empty($quote) && '>' == $html[$i]) {
$out = self::__tagFilter($tag, $attrs);
$outLen = strlen($out);
$nextStart = $start + $outLen;
$tag = '';
$attrs = '';
$html = substr_replace($html, $out, $start, $i - $start + 1);
$len = strlen($html);
$i = $nextStart - 1;
$pos = -1;
$html = strip_tags($html, $normalizeTags);
$dom = new DOMDocument('1.0', self::$charset);
$dom->xmlStandalone = false;
@$dom->loadHTML('<?xml encoding="UTF-8">' . $html);
foreach($dom->getElementsByTagName('*') as $node){
$tagName = strtolower($node->tagName);
for ($i = 0; $i < $node->attributes->length; $i ++) {
$attribute = $node->attributes->item($i);
$name = strtolower($attribute->name);
if (!in_array($name, $allowableAttributes[$tagName])) {
$node->removeAttributeNode($attribute);
}
}
$html = preg_replace_callback("/<\/([_0-9a-z-]+)>/is", array('Typecho_Common', '__closeTagFilter'), $html);
$html = self::releaseHTML($html);
} else {
$html = strip_tags($html);
}
//去掉注释
return preg_replace("/<\!\-\-[^>]*\-\->/s", '', $html);
$body = $dom->getElementsByTagName('body');
return $body->length > 0 ? $body->item(0)->nodeValue : '';
}
/**