mirror of
https://github.com/e107inc/e107.git
synced 2025-05-06 12:15:38 +02:00
ctype_digit() replacement
work in progress
This commit is contained in:
parent
6a3cb3456c
commit
c2de40c75b
@ -9,9 +9,9 @@
|
|||||||
* Text processing and parsing functions
|
* Text processing and parsing functions
|
||||||
*
|
*
|
||||||
* $Source: /cvs_backup/e107_0.8/e107_handlers/e_parse_class.php,v $
|
* $Source: /cvs_backup/e107_0.8/e107_handlers/e_parse_class.php,v $
|
||||||
* $Revision: 1.55 $
|
* $Revision: 1.56 $
|
||||||
* $Date: 2009-07-23 15:29:07 $
|
* $Date: 2009-08-08 14:14:39 $
|
||||||
* $Author: secretr $
|
* $Author: marj_nl_fr $
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
if (!defined('e107_INIT')) { exit; }
|
if (!defined('e107_INIT')) { exit; }
|
||||||
@ -387,10 +387,14 @@ class e_parse
|
|||||||
$nobreak is a list of tags within which word wrap is to be inactive
|
$nobreak is a list of tags within which word wrap is to be inactive
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
//TODO handle htmlwrap somehow
|
||||||
return $str;
|
return $str;
|
||||||
|
|
||||||
if (!ctype_digit($width)) return $str; // Don't wrap if non-numeric width
|
// Don't wrap if non-numeric width
|
||||||
if ($width < 6) return $str; // Trap stupid wrap counts, as well
|
$width = intval($width);
|
||||||
|
// And trap stupid wrap counts
|
||||||
|
if ($width < 6)
|
||||||
|
return $str;
|
||||||
|
|
||||||
// Transform protected element lists into arrays
|
// Transform protected element lists into arrays
|
||||||
$nobreak = explode(" ", strtolower($nobreak));
|
$nobreak = explode(" ", strtolower($nobreak));
|
||||||
@ -406,7 +410,8 @@ class e_parse
|
|||||||
|
|
||||||
// Is $str a UTF8 string?
|
// Is $str a UTF8 string?
|
||||||
if ($utf || strtolower(CHARSET) == 'utf-8')
|
if ($utf || strtolower(CHARSET) == 'utf-8')
|
||||||
{ // 0x1680, 0x180e, 0x2000-0x200a, 0x2028, 0x205f, 0x3000 are 'non-ASCII' Unicode UCS-4 codepoints - see http://www.unicode.org/Public/UNIDATA/UnicodeData.txt
|
{
|
||||||
|
// 0x1680, 0x180e, 0x2000-0x200a, 0x2028, 0x205f, 0x3000 are 'non-ASCII' Unicode UCS-4 codepoints - see http://www.unicode.org/Public/UNIDATA/UnicodeData.txt
|
||||||
// All convert to 3-byte utf-8 sequences:
|
// All convert to 3-byte utf-8 sequences:
|
||||||
// 0x1680 0xe1 0x9a 0x80
|
// 0x1680 0xe1 0x9a 0x80
|
||||||
// 0x180e 0xe1 0xa0 0x8e
|
// 0x180e 0xe1 0xa0 0x8e
|
||||||
@ -423,133 +428,146 @@ class e_parse
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
$utf8 = '';
|
$utf8 = '';
|
||||||
$whiteSpace = '#(\s+)#'; // For non-utf-8, can use a simple match string
|
// For non-utf-8, can use a simple match string
|
||||||
|
$whiteSpace = '#(\s+)#';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Start of the serious stuff - split into HTML tags and text between
|
// Start of the serious stuff - split into HTML tags and text between
|
||||||
$content = preg_split('#(<.*?>)#mis', $str, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE );
|
$content = preg_split('#(<.*?'.'>)#mis', $str, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE );
|
||||||
foreach($content as $value)
|
foreach($content as $value)
|
||||||
{
|
{
|
||||||
if ($value[0] == "<")
|
if ($value[0] == "<")
|
||||||
{ // We are within an HTML tag
|
{
|
||||||
// Create a lowercase copy of this tag's contents
|
// We are within an HTML tag
|
||||||
$lvalue = strtolower(substr($value,1,-1));
|
// Create a lowercase copy of this tag's contents
|
||||||
if ($lvalue)
|
$lvalue = strtolower(substr($value,1,-1));
|
||||||
{ // Tag of non-zero length
|
if ($lvalue)
|
||||||
// If the first character is not a / then this is an opening tag
|
{ // Tag of non-zero length
|
||||||
if ($lvalue[0] != "/")
|
// If the first character is not a / then this is an opening tag
|
||||||
{ // Collect the tag name
|
if ($lvalue[0] != "/")
|
||||||
preg_match("/^(\w*?)(\s|$)/", $lvalue, $t);
|
{
|
||||||
|
// Collect the tag name
|
||||||
|
preg_match("/^(\w*?)(\s|$)/", $lvalue, $t);
|
||||||
|
|
||||||
// If this is a protected element, activate the associated protection flag
|
// If this is a protected element, activate the associated protection flag
|
||||||
if (in_array($t[1], $nobreak)) array_unshift($innbk, $t[1]);
|
if (in_array($t[1], $nobreak)) array_unshift($innbk, $t[1]);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{ // Otherwise this is a closing tag
|
||||||
|
// If this is a closing tag for a protected element, unset the flag
|
||||||
|
if (in_array(substr($lvalue, 1), $nobreak))
|
||||||
|
{
|
||||||
|
reset($innbk);
|
||||||
|
while (list($key, $tag) = each($innbk))
|
||||||
|
{
|
||||||
|
if (substr($lvalue, 1) == $tag)
|
||||||
|
{
|
||||||
|
unset($innbk[$key]);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$innbk = array_values($innbk);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{ // Otherwise this is a closing tag
|
{
|
||||||
// If this is a closing tag for a protected element, unset the flag
|
// Eliminate any empty tags altogether
|
||||||
if (in_array(substr($lvalue, 1), $nobreak))
|
$value = '';
|
||||||
{
|
|
||||||
reset($innbk);
|
|
||||||
while (list($key, $tag) = each($innbk))
|
|
||||||
{
|
|
||||||
if (substr($lvalue, 1) == $tag)
|
|
||||||
{
|
|
||||||
unset($innbk[$key]);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
$innbk = array_values($innbk);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
// Else if we're outside any tags, and with non-zero length string...
|
||||||
else
|
|
||||||
{
|
|
||||||
$value = ''; // Eliminate any empty tags altogether
|
|
||||||
}
|
|
||||||
// Else if we're outside any tags, and with non-zero length string...
|
|
||||||
}
|
}
|
||||||
elseif ($value)
|
elseif ($value)
|
||||||
{ // If unprotected...
|
{
|
||||||
if (!count($innbk))
|
// If unprotected...
|
||||||
{
|
if (!count($innbk))
|
||||||
// Use the ACK (006) ASCII symbol to replace all HTML entities temporarily
|
|
||||||
$value = str_replace("\x06", "", $value);
|
|
||||||
preg_match_all("/&([a-z\d]{2,7}|#\d{2,5});/i", $value, $ents);
|
|
||||||
$value = preg_replace("/&([a-z\d]{2,7}|#\d{2,5});/i", "\x06", $value);
|
|
||||||
// echo "Found block length ".strlen($value).': '.substr($value,20).'<br />';
|
|
||||||
// Split at spaces - note that this will fail if presented with invalid utf-8 when doing the regex whitespace search
|
|
||||||
// $split = preg_split('#(\s)#'.$utf8, $value, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE );
|
|
||||||
$split = preg_split($whiteSpace, $value, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE );
|
|
||||||
$value = '';
|
|
||||||
foreach ($split as $sp)
|
|
||||||
{
|
{
|
||||||
// echo "Split length ".strlen($sp).': '.substr($sp,20).'<br />';
|
// Use the ACK (006) ASCII symbol to replace all HTML entities temporarily
|
||||||
$loopCount = 0;
|
$value = str_replace("\x06", "", $value);
|
||||||
while (strlen($sp) > $width)
|
preg_match_all("/&([a-z\d]{2,7}|#\d{2,5});/i", $value, $ents);
|
||||||
{ // Enough characters that we may need to do something.
|
$value = preg_replace("/&([a-z\d]{2,7}|#\d{2,5});/i", "\x06", $value);
|
||||||
$pulled = '';
|
// echo "Found block length ".strlen($value).': '.substr($value,20).'<br />';
|
||||||
if ($utf8)
|
// Split at spaces - note that this will fail if presented with invalid utf-8 when doing the regex whitespace search
|
||||||
|
// $split = preg_split('#(\s)#'.$utf8, $value, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE );
|
||||||
|
$split = preg_split($whiteSpace, $value, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE );
|
||||||
|
$value = '';
|
||||||
|
foreach ($split as $sp)
|
||||||
|
{
|
||||||
|
// echo "Split length ".strlen($sp).': '.substr($sp,20).'<br />';
|
||||||
|
$loopCount = 0;
|
||||||
|
while (strlen($sp) > $width)
|
||||||
{
|
{
|
||||||
// Pull out a piece of the maximum permissible length
|
// Enough characters that we may need to do something.
|
||||||
if (preg_match('#^((?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0,'.$width.'})(.{0,1}).*#s',$sp,$matches) == 0)
|
$pulled = '';
|
||||||
{
|
if ($utf8)
|
||||||
$value .= '[!<b>invalid utf-8: '.$sp.'<b>!]'; // Make any problems obvious for now
|
|
||||||
$sp = '';
|
|
||||||
}
|
|
||||||
elseif (empty($matches[2]))
|
|
||||||
{ // utf-8 length is less than specified - treat as a special case
|
|
||||||
$value .= $sp;
|
|
||||||
$sp = '';
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{ // Need to find somewhere to break the string
|
|
||||||
for ($i = strlen($matches[1])-1; $i >= 0; $i--)
|
|
||||||
{
|
{
|
||||||
if (strpos($lbrks,$matches[1][$i]) !== FALSE) break;
|
// Pull out a piece of the maximum permissible length
|
||||||
}
|
if (preg_match('#^((?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0,'.$width.'})(.{0,1}).*#s',$sp,$matches) == 0)
|
||||||
if ($i < 0)
|
{
|
||||||
{ // No 'special' break character found - break at the word boundary
|
// Make any problems obvious for now
|
||||||
$pulled = $matches[1];
|
$value .= '[!<b>invalid utf-8: '.$sp.'<b>!]';
|
||||||
|
$sp = '';
|
||||||
|
}
|
||||||
|
elseif (empty($matches[2]))
|
||||||
|
{ // utf-8 length is less than specified - treat as a special case
|
||||||
|
$value .= $sp;
|
||||||
|
$sp = '';
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{ // Need to find somewhere to break the string
|
||||||
|
for ($i = strlen($matches[1])-1; $i >= 0; $i--)
|
||||||
|
{
|
||||||
|
if (strpos($lbrks,$matches[1][$i]) !== FALSE) break;
|
||||||
|
}
|
||||||
|
if ($i < 0)
|
||||||
|
{ // No 'special' break character found - break at the word boundary
|
||||||
|
$pulled = $matches[1];
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
$pulled = substr($sp,0,$i+1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$loopCount++;
|
||||||
|
if ($loopCount > 20)
|
||||||
|
{
|
||||||
|
// Make any problems obvious for now
|
||||||
|
$value .= '[!<b>loop count exceeded: '.$sp.'</b>!]';
|
||||||
|
$sp = '';
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
$pulled = substr($sp,0,$i+1);
|
for ($i = min($width,strlen($sp)); $i > 0; $i--)
|
||||||
|
{
|
||||||
|
// No speed advantage to defining match character
|
||||||
|
if (strpos($lbrks,$sp[$i-1]) !== FALSE)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if ($i == 0)
|
||||||
|
{
|
||||||
|
// No 'special' break boundary character found - break at the word boundary
|
||||||
|
$pulled = substr($sp,0,$width);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
$pulled = substr($sp,0,$i);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
if ($pulled)
|
||||||
$loopCount++;
|
|
||||||
if ($loopCount > 20)
|
|
||||||
{
|
|
||||||
$value .= '[!<b>loop count exceeded: '.$sp.'</b>!]'; // Make any problems obvious for now
|
|
||||||
$sp = '';
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
for ($i = min($width,strlen($sp)); $i > 0; $i--)
|
|
||||||
{
|
{
|
||||||
if (strpos($lbrks,$sp[$i-1]) !== FALSE) break; // No speed advantage to defining match character
|
$value .= $pulled.$break;
|
||||||
}
|
// Shorten $sp by whatever we've processed (will work even for utf-8)
|
||||||
if ($i == 0)
|
$sp = substr($sp,strlen($pulled));
|
||||||
{ // No 'special' break boundary character found - break at the word boundary
|
|
||||||
$pulled = substr($sp,0,$width);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
$pulled = substr($sp,0,$i);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if ($pulled)
|
// Add in any residue
|
||||||
{
|
$value .= $sp;
|
||||||
$value .= $pulled.$break;
|
|
||||||
$sp = substr($sp,strlen($pulled)); // Shorten $sp by whatever we've processed (will work even for utf-8)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
$value .= $sp; // Add in any residue
|
// Put captured HTML entities back into the string
|
||||||
|
foreach ($ents[0] as $ent) $value = preg_replace("/\x06/", $ent, $value, 1);
|
||||||
}
|
}
|
||||||
// Put captured HTML entities back into the string
|
|
||||||
foreach ($ents[0] as $ent) $value = preg_replace("/\x06/", $ent, $value, 1);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
// Send the modified segment down the drain
|
// Send the modified segment down the drain
|
||||||
$drain .= $value;
|
$drain .= $value;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user