mirror of
https://github.com/e107inc/e107.git
synced 2025-03-14 01:19:44 +01:00
MAJOR cleanup of toDB(), toHTML() and toForm(). Requires extensive testing on different server environments.
Has potential to break backward compatibility, so please backup your DBs. May be tested using: echo $tp->test();
This commit is contained in:
parent
2f7334fd6d
commit
5fe04faba1
@ -493,7 +493,6 @@ class e_parse extends e_parser
|
||||
*/
|
||||
public function toDB($data, $nostrip = FALSE, $no_encode = FALSE, $mod = FALSE, $original_author = FALSE)
|
||||
{
|
||||
|
||||
$core_pref = e107::getConfig();
|
||||
if (is_array($data))
|
||||
{
|
||||
@ -514,26 +513,26 @@ class e_parse extends e_parser
|
||||
|
||||
if ($mod != 'pReFs') //XXX We're not saving prefs.
|
||||
{
|
||||
|
||||
$data = $this->preFilter($data); // used by bb_xxx.php toDB() functions. bb_code.php toDB() allows us to properly bypass HTML cleaning below.
|
||||
|
||||
if (strip_tags($data) != $data) // html tags present.
|
||||
{
|
||||
|
||||
$data = $this->cleanHtml($data); // sanitize all html.
|
||||
// return $data;
|
||||
$data = $this->cleanHtml($data); // sanitize all html.
|
||||
|
||||
$data = urldecode($data); // symptom of cleaning the HTML - urlencodes src attributes containing { and } .eg. {e_BASE}
|
||||
$data = urldecode($data); // symptom of cleaning the HTML - urlencodes src attributes containing { and } .eg. {e_BASE}
|
||||
}
|
||||
|
||||
if (!check_class($core_pref->get('post_html', e_UC_MAINADMIN)))
|
||||
{
|
||||
$data = strip_tags($data); // remove tags from cleaned html.
|
||||
$data = str_replace(array('[html]','[/html]'),'',$data);
|
||||
// $data = $this->dataFilter($data);
|
||||
}
|
||||
|
||||
$data = html_entity_decode($data, ENT_QUOTES, 'utf-8'); // Prevent double-entities. Fix for [code] - see bb_code.php toDB();
|
||||
// $data = html_entity_decode($data, ENT_QUOTES, 'utf-8'); // Prevent double-entities. Fix for [code] - see bb_code.php toDB();
|
||||
}
|
||||
|
||||
|
||||
if (check_class($core_pref->get('post_html'))) /*$core_pref->is('post_html') && */
|
||||
{
|
||||
$no_encode = TRUE;
|
||||
@ -741,17 +740,34 @@ class e_parse extends e_parser
|
||||
|
||||
function toForm($text)
|
||||
{
|
||||
|
||||
if(empty($text)) // fix - handle proper 0, Space etc values.
|
||||
{
|
||||
return $text;
|
||||
}
|
||||
|
||||
|
||||
if(substr($text,0,6) == '[html]')
|
||||
{
|
||||
// $text = $this->toHtml($text,true);
|
||||
$search = array('"',''','&');
|
||||
$replace = array('"',"'",'&');
|
||||
$text = str_replace($search,$replace,$text);
|
||||
// return $text;
|
||||
//$text = htmlentities($text,ENT_NOQUOTES, "UTF-8");
|
||||
|
||||
return $text;
|
||||
|
||||
}
|
||||
// return htmlentities($text);
|
||||
|
||||
$search = array('$', '"', '<', '>');
|
||||
$replace = array('$', '"', '<', '>');
|
||||
$text = str_replace($search, $replace, $text);
|
||||
if (e107::wysiwyg() !== true)
|
||||
{
|
||||
// fix for utf-8 issue with html_entity_decode(); ???
|
||||
$text = str_replace(" ", " ", $text);
|
||||
// $text = str_replace(" ", " ", $text);
|
||||
}
|
||||
return $text;
|
||||
}
|
||||
@ -1347,7 +1363,7 @@ class e_parse extends e_parser
|
||||
{
|
||||
return $text;
|
||||
}
|
||||
|
||||
|
||||
$pref = e107::getPref();
|
||||
|
||||
global $fromadmin;
|
||||
@ -1519,10 +1535,11 @@ class e_parse extends e_parser
|
||||
|
||||
case 'html' : // This overrides and deprecates html.bb
|
||||
$proc_funcs = TRUE;
|
||||
|
||||
$noBreak = TRUE;
|
||||
// $code_text = str_replace("\r\n", " ", $code_text);
|
||||
$code_text = html_entity_decode($code_text, ENT_QUOTES, CHARSET);
|
||||
$code_text = str_replace('&','&',$code_text); // validation safe.
|
||||
// $code_text = html_entity_decode($code_text, ENT_QUOTES, CHARSET);
|
||||
// $code_text = str_replace('&','&',$code_text); // validation safe.
|
||||
$html_start = "<!-- bbcode-html-start -->"; // markers for html-to-bbcode replacement.
|
||||
$html_end = "<!-- bbcode-html-end -->";
|
||||
$full_text = str_replace(array("[html]","[/html]"), "",$code_text); // quick fix.. security issue?
|
||||
@ -1530,8 +1547,9 @@ class e_parse extends e_parser
|
||||
$full_text = $html_start.$full_text.$html_end;
|
||||
$full_text = $this->parseBBTags($full_text); // strip <bbcode> tags.
|
||||
$opts['nobreak'] = true;
|
||||
|
||||
break;
|
||||
|
||||
|
||||
case 'table' : // strip <br /> from inside of <table>
|
||||
|
||||
$convertNL = FALSE;
|
||||
@ -3231,8 +3249,69 @@ class e_parser
|
||||
// $tp = e107::getParser();
|
||||
$sql = e107::getDb();
|
||||
$tp = e107::getParser();
|
||||
|
||||
// -------------------- Encoding ----------------
|
||||
|
||||
echo "<h2>Encoding Test</h2>";
|
||||
|
||||
echo"<h3>User Input from Form</h3>";
|
||||
|
||||
$text = <<<TMPL
|
||||
[html]<p><strong>bold print</strong></p>
|
||||
<pre class="prettyprint linenums"><a href='#'>Something</a></pre>
|
||||
<p>Some text's and things.</p>
|
||||
<p> </p>
|
||||
<p><a href="/test.php?w=9&h=12">link</a></p>
|
||||
<p>日本語 简体中文</p>
|
||||
<p> </p>
|
||||
[/html]
|
||||
TMPL;
|
||||
|
||||
|
||||
|
||||
// $text .= '[code=inline]<b class="something">Something</b>[/code]日本語 ';
|
||||
|
||||
|
||||
print_a($text);
|
||||
|
||||
$dbText = $tp->toDB($text,true);
|
||||
|
||||
echo "<h3>Use Input ≫ toDb()</h3>";
|
||||
|
||||
print_a($dbText);
|
||||
|
||||
echo "<h3>From DB ≫ toHtml()</h3>";
|
||||
$html = $tp->toHtml($dbText,true);
|
||||
print_a($html);
|
||||
|
||||
echo "<h3>From DB ≫ toHtml() (rendered)</h3>";
|
||||
echo $html;
|
||||
|
||||
echo "<h3>FromDB ≫ toForm()</h3>";
|
||||
$toForm = $tp->toForm($dbText);
|
||||
echo e107::getForm()->open('test');
|
||||
echo "<textarea cols='100' style='width:100%;height:300px' >".$toForm."</textarea>";
|
||||
echo e107::getForm()->close();
|
||||
|
||||
|
||||
similar_text($text, html_entity_decode( $toForm, ENT_COMPAT, 'UTF-8'),$perc);
|
||||
echo "<h3>Original to compare with above (".number_format($perc)."%) - Should be over 95%</h3>";
|
||||
|
||||
|
||||
|
||||
|
||||
print_a($text);
|
||||
|
||||
return;
|
||||
|
||||
//return;
|
||||
// ---------------------------------
|
||||
|
||||
|
||||
$html = $text;
|
||||
|
||||
|
||||
$html = $this->getXss();
|
||||
// $html = $this->getXss();
|
||||
|
||||
echo "<h2>Unprocessed XSS</h2>";
|
||||
// echo $html; // Remove Comment for a real mess!
|
||||
@ -3251,14 +3330,17 @@ class e_parser
|
||||
$sql->db_Mark_Time('tp->toHtml');
|
||||
|
||||
echo "<h3>\$tp->toDB()</h3>";
|
||||
// echo $tp->dataFilter($html); // Remove Comment for a real mess!
|
||||
print_a($tp->toDB($html));
|
||||
$sql->db_Mark_Time('tp->toDB');
|
||||
|
||||
// echo $tp->dataFilter($html); // Remove Comment for a real mess!
|
||||
$todb = $tp->toDB($html);
|
||||
print_a( $todb);
|
||||
$sql->db_Mark_Time('tp->toDB');
|
||||
|
||||
echo "<h3>\$tp->toForm() with toDB input.</h3>";
|
||||
print_a( $tp->toForm($todb));
|
||||
|
||||
echo "<h2>New Parser</h2>";
|
||||
echo "<h3>Processed</h3>";
|
||||
$cleaned = $this->cleanHtml($html);
|
||||
$cleaned = $this->cleanHtml($html, true); // false = don't check html pref.
|
||||
print_a($cleaned);
|
||||
$sql->db_Mark_Time('new Parser');
|
||||
// $sql->db_Mark_Time('------ End Parser Test -------');
|
||||
@ -3286,8 +3368,13 @@ class e_parser
|
||||
public function cleanHtml($html='', $checkPref = true)
|
||||
{
|
||||
if(empty($html)){ return ''; }
|
||||
|
||||
// $html = mb_convert_encoding($html, 'UTF-8');
|
||||
|
||||
|
||||
$html = mb_convert_encoding($html, 'UTF-8');
|
||||
|
||||
// $html = mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8');
|
||||
|
||||
|
||||
|
||||
if(preg_match("/<body/i",$html)!==true) // HTML Fragment
|
||||
{
|
||||
@ -3319,8 +3406,9 @@ class e_parser
|
||||
// Set it up for processing.
|
||||
$doc = $this->domObj;
|
||||
|
||||
@$doc->loadHTML($html);
|
||||
$doc->encoding = 'UTF-8'; //FIXME
|
||||
@$doc->loadHTML($html);
|
||||
// $doc->encoding = 'UTF-8';
|
||||
|
||||
// $doc->resolveExternals = true;
|
||||
|
||||
// $tmp = $doc->getElementsByTagName('*');
|
||||
@ -3427,15 +3515,15 @@ class e_parser
|
||||
$value = substr($value,0,$end);
|
||||
}
|
||||
|
||||
$value = htmlentities(htmlentities($value)); // Needed
|
||||
$value = htmlentities(htmlentities($value)); // Needed
|
||||
$node->nodeValue = $value;
|
||||
}
|
||||
|
||||
$cleaned = $doc->saveHTML();
|
||||
$cleaned = $doc->saveHTML($doc->documentElement); // $doc->documentElement fixes utf-8/entities issue. @see http://stackoverflow.com/questions/8218230/php-domdocument-loadhtml-not-encoding-utf-8-correctly
|
||||
|
||||
$cleaned = str_replace(array('<body>','</body>','<html>','</html>','<!DOCTYPE html>','<meta charset="UTF-8">','<?xml version="1.0" encoding="utf-8"?>'),'',$cleaned); // filter out tags.
|
||||
|
||||
$cleaned = html_entity_decode($cleaned, ENT_QUOTES, 'UTF-8');
|
||||
// $cleaned = html_entity_decode($cleaned, ENT_QUOTES, 'UTF-8');
|
||||
|
||||
return trim($cleaned);
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user