1
0
mirror of https://github.com/e107inc/e107.git synced 2025-07-12 10:36:20 +02:00

ctype_digit() replacement

work in progress
This commit is contained in:
marj
2009-08-08 14:14:39 +00:00
parent 6a3cb3456c
commit c2de40c75b

View File

@ -9,9 +9,9 @@
* Text processing and parsing functions * Text processing and parsing functions
* *
* $Source: /cvs_backup/e107_0.8/e107_handlers/e_parse_class.php,v $ * $Source: /cvs_backup/e107_0.8/e107_handlers/e_parse_class.php,v $
* $Revision: 1.55 $ * $Revision: 1.56 $
* $Date: 2009-07-23 15:29:07 $ * $Date: 2009-08-08 14:14:39 $
* $Author: secretr $ * $Author: marj_nl_fr $
* *
*/ */
if (!defined('e107_INIT')) { exit; } if (!defined('e107_INIT')) { exit; }
@ -387,10 +387,14 @@ class e_parse
$nobreak is a list of tags within which word wrap is to be inactive $nobreak is a list of tags within which word wrap is to be inactive
*/ */
//TODO handle htmlwrap somehow
return $str; return $str;
if (!ctype_digit($width)) return $str; // Don't wrap if non-numeric width // Don't wrap if non-numeric width
if ($width < 6) return $str; // Trap stupid wrap counts, as well $width = intval($width);
// And trap stupid wrap counts
if ($width < 6)
return $str;
// Transform protected element lists into arrays // Transform protected element lists into arrays
$nobreak = explode(" ", strtolower($nobreak)); $nobreak = explode(" ", strtolower($nobreak));
@ -406,7 +410,8 @@ class e_parse
// Is $str a UTF8 string? // Is $str a UTF8 string?
if ($utf || strtolower(CHARSET) == 'utf-8') if ($utf || strtolower(CHARSET) == 'utf-8')
{ // 0x1680, 0x180e, 0x2000-0x200a, 0x2028, 0x205f, 0x3000 are 'non-ASCII' Unicode UCS-4 codepoints - see http://www.unicode.org/Public/UNIDATA/UnicodeData.txt {
// 0x1680, 0x180e, 0x2000-0x200a, 0x2028, 0x205f, 0x3000 are 'non-ASCII' Unicode UCS-4 codepoints - see http://www.unicode.org/Public/UNIDATA/UnicodeData.txt
// All convert to 3-byte utf-8 sequences: // All convert to 3-byte utf-8 sequences:
// 0x1680 0xe1 0x9a 0x80 // 0x1680 0xe1 0x9a 0x80
// 0x180e 0xe1 0xa0 0x8e // 0x180e 0xe1 0xa0 0x8e
@ -423,23 +428,26 @@ class e_parse
else else
{ {
$utf8 = ''; $utf8 = '';
$whiteSpace = '#(\s+)#'; // For non-utf-8, can use a simple match string // For non-utf-8, can use a simple match string
$whiteSpace = '#(\s+)#';
} }
// Start of the serious stuff - split into HTML tags and text between // Start of the serious stuff - split into HTML tags and text between
$content = preg_split('#(<.*?>)#mis', $str, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE ); $content = preg_split('#(<.*?'.'>)#mis', $str, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE );
foreach($content as $value) foreach($content as $value)
{ {
if ($value[0] == "<") if ($value[0] == "<")
{ // We are within an HTML tag {
// We are within an HTML tag
// Create a lowercase copy of this tag's contents // Create a lowercase copy of this tag's contents
$lvalue = strtolower(substr($value,1,-1)); $lvalue = strtolower(substr($value,1,-1));
if ($lvalue) if ($lvalue)
{ // Tag of non-zero length { // Tag of non-zero length
// If the first character is not a / then this is an opening tag // If the first character is not a / then this is an opening tag
if ($lvalue[0] != "/") if ($lvalue[0] != "/")
{ // Collect the tag name {
// Collect the tag name
preg_match("/^(\w*?)(\s|$)/", $lvalue, $t); preg_match("/^(\w*?)(\s|$)/", $lvalue, $t);
// If this is a protected element, activate the associated protection flag // If this is a protected element, activate the associated protection flag
@ -465,12 +473,14 @@ class e_parse
} }
else else
{ {
$value = ''; // Eliminate any empty tags altogether // Eliminate any empty tags altogether
$value = '';
} }
// Else if we're outside any tags, and with non-zero length string... // Else if we're outside any tags, and with non-zero length string...
} }
elseif ($value) elseif ($value)
{ // If unprotected... {
// If unprotected...
if (!count($innbk)) if (!count($innbk))
{ {
// Use the ACK (006) ASCII symbol to replace all HTML entities temporarily // Use the ACK (006) ASCII symbol to replace all HTML entities temporarily
@ -487,14 +497,16 @@ class e_parse
// echo "Split length ".strlen($sp).': '.substr($sp,20).'<br />'; // echo "Split length ".strlen($sp).': '.substr($sp,20).'<br />';
$loopCount = 0; $loopCount = 0;
while (strlen($sp) > $width) while (strlen($sp) > $width)
{ // Enough characters that we may need to do something. {
// Enough characters that we may need to do something.
$pulled = ''; $pulled = '';
if ($utf8) if ($utf8)
{ {
// Pull out a piece of the maximum permissible length // Pull out a piece of the maximum permissible length
if (preg_match('#^((?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0,'.$width.'})(.{0,1}).*#s',$sp,$matches) == 0) if (preg_match('#^((?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0,'.$width.'})(.{0,1}).*#s',$sp,$matches) == 0)
{ {
$value .= '[!<b>invalid utf-8: '.$sp.'<b>!]'; // Make any problems obvious for now // Make any problems obvious for now
$value .= '[!<b>invalid utf-8: '.$sp.'<b>!]';
$sp = ''; $sp = '';
} }
elseif (empty($matches[2])) elseif (empty($matches[2]))
@ -520,7 +532,8 @@ class e_parse
$loopCount++; $loopCount++;
if ($loopCount > 20) if ($loopCount > 20)
{ {
$value .= '[!<b>loop count exceeded: '.$sp.'</b>!]'; // Make any problems obvious for now // Make any problems obvious for now
$value .= '[!<b>loop count exceeded: '.$sp.'</b>!]';
$sp = ''; $sp = '';
} }
} }
@ -528,10 +541,13 @@ class e_parse
{ {
for ($i = min($width,strlen($sp)); $i > 0; $i--) for ($i = min($width,strlen($sp)); $i > 0; $i--)
{ {
if (strpos($lbrks,$sp[$i-1]) !== FALSE) break; // No speed advantage to defining match character // No speed advantage to defining match character
if (strpos($lbrks,$sp[$i-1]) !== FALSE)
break;
} }
if ($i == 0) if ($i == 0)
{ // No 'special' break boundary character found - break at the word boundary {
// No 'special' break boundary character found - break at the word boundary
$pulled = substr($sp,0,$width); $pulled = substr($sp,0,$width);
} }
else else
@ -542,10 +558,12 @@ class e_parse
if ($pulled) if ($pulled)
{ {
$value .= $pulled.$break; $value .= $pulled.$break;
$sp = substr($sp,strlen($pulled)); // Shorten $sp by whatever we've processed (will work even for utf-8) // Shorten $sp by whatever we've processed (will work even for utf-8)
$sp = substr($sp,strlen($pulled));
} }
} }
$value .= $sp; // Add in any residue // Add in any residue
$value .= $sp;
} }
// Put captured HTML entities back into the string // Put captured HTML entities back into the string
foreach ($ents[0] as $ent) $value = preg_replace("/\x06/", $ent, $value, 1); foreach ($ents[0] as $ent) $value = preg_replace("/\x06/", $ent, $value, 1);