Bugtracker #3963 - proper utf-8 truncate - thanks verant

2025-10-10 20:44:48 +02:00 · 2007-06-06 19:28:25 +00:00
parent 1f6c9ec4d1
commit bff82ee583
1 changed files with 19 additions and 9 deletions
--- a/e107_handlers/e_parse_class.php
+++ b/e107_handlers/e_parse_class.php
@@ -11,8 +11,8 @@
 |     GNU General Public License (http://gnu.org).
 |
 |     $Source: /cvs_backup/e107_0.8/e107_handlers/e_parse_class.php,v $
-|     $Revision: 1.11 $
-|     $Date: 2007-05-28 18:02:38 $
+|     $Revision: 1.12 $
+|     $Date: 2007-06-06 19:28:25 $
 |     $Author: e107steved $
 +----------------------------------------------------------------------------+
 */
@@ -290,6 +290,7 @@ class e_parse
 				$tmp_pos = $pos-1;
 				$pos++;
 				break;
+				
 				case ">" :
 				if($text{$pos-1} == "/")
 				{
@@ -303,6 +304,7 @@ class e_parse
 				$intag = FALSE;
 				$pos++;
 				break;
+				
 				case "&" :
 				if($text{$pos+1} == "#")
 				{
@@ -334,16 +336,24 @@ class e_parse
 		return $ret;
 	}

-	function text_truncate($text, $len = 200, $more = "[more]") {
-		if(strlen($text) <= $len) {
-			return $text;
-		} else { // utf-8 compatible substr()
-            return preg_replace('#^(?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0,0}'.
+
+	// Truncate a string to a maximum length $len - append the string $more if it was truncated
+	// Uses current CHARSET - for utf-8, returns $len characters rather than $len bytes
+	function text_truncate($text, $len = 200, $more = "[more]") 
+	{
+	  if (strlen($text) <= $len) return $text; 		// Always valid
+	  if (CHARSET !== 'utf-8') return substr($text,0,$len).$more;	// Non-utf-8 - one byte per character - simple
+  
+	  // Its a utf-8 string here - don't know whether its longer than allowed length yet
+	  $ret = preg_replace('#^(?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0,0}'.
 					'((?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0,'.$len.'}).*#s',
-					'$1',$text).$more;
-		}
+					'$1',$text);
+	  // Now check the final length - need to count characters rather than bytes
+	  if (preg_match_all('/[\x00-\x7F\xC0-\xFD]/', $ret, $dummy) > $len) $ret .= $more;
+	  return $ret;
 	}

+
 	function textclean ($text, $wrap=100)
 	{
 		$text = str_replace ("\n\n\n", "\n\n", $text);