1
0
mirror of https://github.com/e107inc/e107.git synced 2025-07-28 02:10:46 +02:00

Bugtracker #3963 - proper utf-8 truncate - thanks verant

This commit is contained in:
e107steved
2007-06-06 19:28:25 +00:00
parent 1f6c9ec4d1
commit bff82ee583

View File

@@ -11,8 +11,8 @@
| GNU General Public License (http://gnu.org). | GNU General Public License (http://gnu.org).
| |
| $Source: /cvs_backup/e107_0.8/e107_handlers/e_parse_class.php,v $ | $Source: /cvs_backup/e107_0.8/e107_handlers/e_parse_class.php,v $
| $Revision: 1.11 $ | $Revision: 1.12 $
| $Date: 2007-05-28 18:02:38 $ | $Date: 2007-06-06 19:28:25 $
| $Author: e107steved $ | $Author: e107steved $
+----------------------------------------------------------------------------+ +----------------------------------------------------------------------------+
*/ */
@@ -290,6 +290,7 @@ class e_parse
$tmp_pos = $pos-1; $tmp_pos = $pos-1;
$pos++; $pos++;
break; break;
case ">" : case ">" :
if($text{$pos-1} == "/") if($text{$pos-1} == "/")
{ {
@@ -303,6 +304,7 @@ class e_parse
$intag = FALSE; $intag = FALSE;
$pos++; $pos++;
break; break;
case "&" : case "&" :
if($text{$pos+1} == "#") if($text{$pos+1} == "#")
{ {
@@ -334,16 +336,24 @@ class e_parse
return $ret; return $ret;
} }
function text_truncate($text, $len = 200, $more = "[more]") {
if(strlen($text) <= $len) { // Truncate a string to a maximum length $len - append the string $more if it was truncated
return $text; // Uses current CHARSET - for utf-8, returns $len characters rather than $len bytes
} else { // utf-8 compatible substr() function text_truncate($text, $len = 200, $more = "[more]")
return preg_replace('#^(?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0,0}'. {
if (strlen($text) <= $len) return $text; // Always valid
if (CHARSET !== 'utf-8') return substr($text,0,$len).$more; // Non-utf-8 - one byte per character - simple
// Its a utf-8 string here - don't know whether its longer than allowed length yet
$ret = preg_replace('#^(?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0,0}'.
'((?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0,'.$len.'}).*#s', '((?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0,'.$len.'}).*#s',
'$1',$text).$more; '$1',$text);
} // Now check the final length - need to count characters rather than bytes
if (preg_match_all('/[\x00-\x7F\xC0-\xFD]/', $ret, $dummy) > $len) $ret .= $more;
return $ret;
} }
function textclean ($text, $wrap=100) function textclean ($text, $wrap=100)
{ {
$text = str_replace ("\n\n\n", "\n\n", $text); $text = str_replace ("\n\n\n", "\n\n", $text);