mirror of
https://github.com/e107inc/e107.git
synced 2025-07-28 10:20:45 +02:00
Bugtracker #3963 - proper utf-8 truncate - thanks verant
This commit is contained in:
@@ -11,8 +11,8 @@
|
|||||||
| GNU General Public License (http://gnu.org).
|
| GNU General Public License (http://gnu.org).
|
||||||
|
|
|
|
||||||
| $Source: /cvs_backup/e107_0.8/e107_handlers/e_parse_class.php,v $
|
| $Source: /cvs_backup/e107_0.8/e107_handlers/e_parse_class.php,v $
|
||||||
| $Revision: 1.11 $
|
| $Revision: 1.12 $
|
||||||
| $Date: 2007-05-28 18:02:38 $
|
| $Date: 2007-06-06 19:28:25 $
|
||||||
| $Author: e107steved $
|
| $Author: e107steved $
|
||||||
+----------------------------------------------------------------------------+
|
+----------------------------------------------------------------------------+
|
||||||
*/
|
*/
|
||||||
@@ -290,6 +290,7 @@ class e_parse
|
|||||||
$tmp_pos = $pos-1;
|
$tmp_pos = $pos-1;
|
||||||
$pos++;
|
$pos++;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case ">" :
|
case ">" :
|
||||||
if($text{$pos-1} == "/")
|
if($text{$pos-1} == "/")
|
||||||
{
|
{
|
||||||
@@ -303,6 +304,7 @@ class e_parse
|
|||||||
$intag = FALSE;
|
$intag = FALSE;
|
||||||
$pos++;
|
$pos++;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case "&" :
|
case "&" :
|
||||||
if($text{$pos+1} == "#")
|
if($text{$pos+1} == "#")
|
||||||
{
|
{
|
||||||
@@ -334,16 +336,24 @@ class e_parse
|
|||||||
return $ret;
|
return $ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
function text_truncate($text, $len = 200, $more = "[more]") {
|
|
||||||
if(strlen($text) <= $len) {
|
// Truncate a string to a maximum length $len - append the string $more if it was truncated
|
||||||
return $text;
|
// Uses current CHARSET - for utf-8, returns $len characters rather than $len bytes
|
||||||
} else { // utf-8 compatible substr()
|
function text_truncate($text, $len = 200, $more = "[more]")
|
||||||
return preg_replace('#^(?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0,0}'.
|
{
|
||||||
|
if (strlen($text) <= $len) return $text; // Always valid
|
||||||
|
if (CHARSET !== 'utf-8') return substr($text,0,$len).$more; // Non-utf-8 - one byte per character - simple
|
||||||
|
|
||||||
|
// Its a utf-8 string here - don't know whether its longer than allowed length yet
|
||||||
|
$ret = preg_replace('#^(?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0,0}'.
|
||||||
'((?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0,'.$len.'}).*#s',
|
'((?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0,'.$len.'}).*#s',
|
||||||
'$1',$text).$more;
|
'$1',$text);
|
||||||
}
|
// Now check the final length - need to count characters rather than bytes
|
||||||
|
if (preg_match_all('/[\x00-\x7F\xC0-\xFD]/', $ret, $dummy) > $len) $ret .= $more;
|
||||||
|
return $ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
function textclean ($text, $wrap=100)
|
function textclean ($text, $wrap=100)
|
||||||
{
|
{
|
||||||
$text = str_replace ("\n\n\n", "\n\n", $text);
|
$text = str_replace ("\n\n\n", "\n\n", $text);
|
||||||
|
Reference in New Issue
Block a user