From bff82ee5831c386a0efd3c49e38e43b9ab49b105 Mon Sep 17 00:00:00 2001 From: e107steved Date: Wed, 6 Jun 2007 19:28:25 +0000 Subject: [PATCH] Bugtracker #3963 - proper utf-8 truncate - thanks verant --- e107_handlers/e_parse_class.php | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/e107_handlers/e_parse_class.php b/e107_handlers/e_parse_class.php index 29ecd0611..d3eefb5e7 100644 --- a/e107_handlers/e_parse_class.php +++ b/e107_handlers/e_parse_class.php @@ -11,8 +11,8 @@ | GNU General Public License (http://gnu.org). | | $Source: /cvs_backup/e107_0.8/e107_handlers/e_parse_class.php,v $ -| $Revision: 1.11 $ -| $Date: 2007-05-28 18:02:38 $ +| $Revision: 1.12 $ +| $Date: 2007-06-06 19:28:25 $ | $Author: e107steved $ +----------------------------------------------------------------------------+ */ @@ -290,6 +290,7 @@ class e_parse $tmp_pos = $pos-1; $pos++; break; + case ">" : if($text{$pos-1} == "/") { @@ -303,6 +304,7 @@ class e_parse $intag = FALSE; $pos++; break; + case "&" : if($text{$pos+1} == "#") { @@ -334,16 +336,24 @@ class e_parse return $ret; } - function text_truncate($text, $len = 200, $more = "[more]") { - if(strlen($text) <= $len) { - return $text; - } else { // utf-8 compatible substr() - return preg_replace('#^(?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0,0}'. + + // Truncate a string to a maximum length $len - append the string $more if it was truncated + // Uses current CHARSET - for utf-8, returns $len characters rather than $len bytes + function text_truncate($text, $len = 200, $more = "[more]") + { + if (strlen($text) <= $len) return $text; // Always valid + if (CHARSET !== 'utf-8') return substr($text,0,$len).$more; // Non-utf-8 - one byte per character - simple + + // Its a utf-8 string here - don't know whether its longer than allowed length yet + $ret = preg_replace('#^(?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0,0}'. '((?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0,'.$len.'}).*#s', - '$1',$text).$more; - } + '$1',$text); + // Now check the final length - need to count characters rather than bytes + if (preg_match_all('/[\x00-\x7F\xC0-\xFD]/', $ret, $dummy) > $len) $ret .= $more; + return $ret; } + function textclean ($text, $wrap=100) { $text = str_replace ("\n\n\n", "\n\n", $text);