1
0
mirror of https://github.com/e107inc/e107.git synced 2025-07-28 02:10:46 +02:00

Bugtracker #3963 - proper utf-8 truncate - thanks verant

This commit is contained in:
e107steved
2007-06-06 19:28:25 +00:00
parent 1f6c9ec4d1
commit bff82ee583

View File

@@ -11,8 +11,8 @@
| GNU General Public License (http://gnu.org).
|
| $Source: /cvs_backup/e107_0.8/e107_handlers/e_parse_class.php,v $
| $Revision: 1.11 $
| $Date: 2007-05-28 18:02:38 $
| $Revision: 1.12 $
| $Date: 2007-06-06 19:28:25 $
| $Author: e107steved $
+----------------------------------------------------------------------------+
*/
@@ -290,6 +290,7 @@ class e_parse
$tmp_pos = $pos-1;
$pos++;
break;
case ">" :
if($text{$pos-1} == "/")
{
@@ -303,6 +304,7 @@ class e_parse
$intag = FALSE;
$pos++;
break;
case "&" :
if($text{$pos+1} == "#")
{
@@ -334,16 +336,24 @@ class e_parse
return $ret;
}
function text_truncate($text, $len = 200, $more = "[more]") {
if(strlen($text) <= $len) {
return $text;
} else { // utf-8 compatible substr()
return preg_replace('#^(?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0,0}'.
// Truncate a string to a maximum length $len - append the string $more if it was truncated
// Uses current CHARSET - for utf-8, returns $len characters rather than $len bytes
function text_truncate($text, $len = 200, $more = "[more]")
{
if (strlen($text) <= $len) return $text; // Always valid
if (CHARSET !== 'utf-8') return substr($text,0,$len).$more; // Non-utf-8 - one byte per character - simple
// Its a utf-8 string here - don't know whether its longer than allowed length yet
$ret = preg_replace('#^(?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0,0}'.
'((?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0,'.$len.'}).*#s',
'$1',$text).$more;
}
'$1',$text);
// Now check the final length - need to count characters rather than bytes
if (preg_match_all('/[\x00-\x7F\xC0-\xFD]/', $ret, $dummy) > $len) $ret .= $more;
return $ret;
}
function textclean ($text, $wrap=100)
{
$text = str_replace ("\n\n\n", "\n\n", $text);