ctype_digit() replacement

work in progress
2025-10-13 05:55:02 +02:00 · 2009-08-08 14:14:39 +00:00
parent 6a3cb3456c
commit c2de40c75b
1 changed files with 126 additions and 108 deletions
--- a/e107_handlers/e_parse_class.php
+++ b/e107_handlers/e_parse_class.php
@@ -9,9 +9,9 @@
 * Text processing and parsing functions
 *
 * $Source: /cvs_backup/e107_0.8/e107_handlers/e_parse_class.php,v $
-* $Revision: 1.55 $
+* $Revision: 1.56 $
-* $Date: 2009-07-23 15:29:07 $
+* $Date: 2009-08-08 14:14:39 $
-* $Author: secretr $
+* $Author: marj_nl_fr $
 *
 */
 if (!defined('e107_INIT')) { exit; }
@@ -387,10 +387,14 @@ class e_parse
 		$nobreak is a list of tags within which word wrap is to be inactive
 		*/
 		//TODO handle htmlwrap somehow
 		return $str;
-		if (!ctype_digit($width)) return $str;		// Don't wrap if non-numeric width
+		// Don't wrap if non-numeric width
-		if ($width < 6) return $str;					// Trap stupid wrap counts, as well
+		$width = intval($width);
 		// And trap stupid wrap counts
 		if ($width < 6)
 			return $str;
 		// Transform protected element lists into arrays
 		$nobreak = explode(" ", strtolower($nobreak));
@@ -406,7 +410,8 @@ class e_parse
 		// Is $str a UTF8 string?
 		if ($utf || strtolower(CHARSET) == 'utf-8')
-		{	// 0x1680, 0x180e, 0x2000-0x200a, 0x2028, 0x205f, 0x3000 are 'non-ASCII' Unicode UCS-4 codepoints - see http://www.unicode.org/Public/UNIDATA/UnicodeData.txt
+		{
 			// 0x1680, 0x180e, 0x2000-0x200a, 0x2028, 0x205f, 0x3000 are 'non-ASCII' Unicode UCS-4 codepoints - see http://www.unicode.org/Public/UNIDATA/UnicodeData.txt
 			// All convert to 3-byte utf-8 sequences:
 			// 0x1680	0xe1	0x9a	0x80
 			// 0x180e	0xe1	0xa0	0x8e
@@ -423,133 +428,146 @@ class e_parse
 		else
 		{
 			$utf8 = '';
-			$whiteSpace = '#(\s+)#';		// For non-utf-8, can use a simple match string
+			// For non-utf-8, can use a simple match string
 			$whiteSpace = '#(\s+)#';
 		}
 		// Start of the serious stuff - split into HTML tags and text between
-		$content = preg_split('#(<.*?>)#mis', $str, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE );
+		$content = preg_split('#(<.*?'.'>)#mis', $str, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE );
 		foreach($content as $value)
 		{
 			if ($value[0] == "<")
-			{  // We are within an HTML tag
+			{
-			  // Create a lowercase copy of this tag's contents
+				// We are within an HTML tag
-			  $lvalue = strtolower(substr($value,1,-1));
+				// Create a lowercase copy of this tag's contents
-			  if ($lvalue)
+				$lvalue = strtolower(substr($value,1,-1));
-			  {	// Tag of non-zero length
+				if ($lvalue)
-				// If the first character is not a / then this is an opening tag
+				{	// Tag of non-zero length
-				if ($lvalue[0] != "/")
+					// If the first character is not a / then this is an opening tag
-				{            // Collect the tag name
+					if ($lvalue[0] != "/")
-				  preg_match("/^(\w*?)(\s|$)/", $lvalue, $t);
+					{
 						// Collect the tag name
 						preg_match("/^(\w*?)(\s|$)/", $lvalue, $t);
-				  // If this is a protected element, activate the associated protection flag
+						// If this is a protected element, activate the associated protection flag
-				  if (in_array($t[1], $nobreak)) array_unshift($innbk, $t[1]);
+						if (in_array($t[1], $nobreak)) array_unshift($innbk, $t[1]);
 					}
 					else
 					{  // Otherwise this is a closing tag
 						// If this is a closing tag for a protected element, unset the flag
 						if (in_array(substr($lvalue, 1), $nobreak))
 						{
 							reset($innbk);
 							while (list($key, $tag) = each($innbk))
 							{
 								if (substr($lvalue, 1) == $tag)
 								{
 									unset($innbk[$key]);
 									break;
 								}
 							}
 							$innbk = array_values($innbk);
 						}
 					}
 				}
 				else
-				{  // Otherwise this is a closing tag
+				{
-				  // If this is a closing tag for a protected element, unset the flag
+					// Eliminate any empty tags altogether
-				  if (in_array(substr($lvalue, 1), $nobreak))
+					$value = '';
 				  {
 					reset($innbk);
 					while (list($key, $tag) = each($innbk))
 					{
 					  if (substr($lvalue, 1) == $tag)
 					  {
 						unset($innbk[$key]);
 						break;
 					  }
 					}
 					$innbk = array_values($innbk);
 				  }
 				}
-			  }
+				// Else if we're outside any tags, and with non-zero length string...
 			  else
 			  {
 				$value = '';		// Eliminate any empty tags altogether
 			  }
 			// Else if we're outside any tags, and with non-zero length string...
 			}
 			elseif ($value)
-			{    // If unprotected...
+			{
-			  if (!count($innbk))
+				// If unprotected...
-			  {
+				if (!count($innbk))
 				// Use the ACK (006) ASCII symbol to replace all HTML entities temporarily
 				$value = str_replace("\x06", "", $value);
 				preg_match_all("/&([a-z\d]{2,7}|#\d{2,5});/i", $value, $ents);
 				$value = preg_replace("/&([a-z\d]{2,7}|#\d{2,5});/i", "\x06", $value);
 	//			echo "Found block length ".strlen($value).': '.substr($value,20).'<br />';
 				// Split at spaces - note that this will fail if presented with invalid utf-8 when doing the regex whitespace search
 	//			$split = preg_split('#(\s)#'.$utf8, $value, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE );
 				$split = preg_split($whiteSpace, $value, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE );
 				$value = '';
 				foreach ($split as $sp)
 				{
-	//			echo "Split length ".strlen($sp).': '.substr($sp,20).'<br />';
+					// Use the ACK (006) ASCII symbol to replace all HTML entities temporarily
-					$loopCount = 0;
+					$value = str_replace("\x06", "", $value);
-					while (strlen($sp) > $width)
+					preg_match_all("/&([a-z\d]{2,7}|#\d{2,5});/i", $value, $ents);
-					{	// Enough characters that we may need to do something.
+					$value = preg_replace("/&([a-z\d]{2,7}|#\d{2,5});/i", "\x06", $value);
-						$pulled = '';
+					//			echo "Found block length ".strlen($value).': '.substr($value,20).'<br />';
-						if ($utf8)
+					// Split at spaces - note that this will fail if presented with invalid utf-8 when doing the regex whitespace search
 					//			$split = preg_split('#(\s)#'.$utf8, $value, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE );
 					$split = preg_split($whiteSpace, $value, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE );
 					$value = '';
 					foreach ($split as $sp)
 					{
 						//			echo "Split length ".strlen($sp).': '.substr($sp,20).'<br />';
 						$loopCount = 0;
 						while (strlen($sp) > $width)
 						{
-						  // Pull out a piece of the maximum permissible length
+							// Enough characters that we may need to do something.
-						  if (preg_match('#^((?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0,'.$width.'})(.{0,1}).*#s',$sp,$matches) == 0)
+							$pulled = '';
-						  {
+							if ($utf8)
 							$value .= '[!<b>invalid utf-8: '.$sp.'<b>!]';		// Make any problems obvious for now
 							$sp = '';
 						  }
 						  elseif (empty($matches[2]))
 						  {  // utf-8 length is less than specified - treat as a special case
 							$value .= $sp;
 							$sp = '';
 						  }
 						  else
 						  {		// Need to find somewhere to break the string
 							for ($i = strlen($matches[1])-1; $i >= 0; $i--)
 							{
-							  if (strpos($lbrks,$matches[1][$i]) !== FALSE) break;
+								// Pull out a piece of the maximum permissible length
-							}
+								if (preg_match('#^((?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0,'.$width.'})(.{0,1}).*#s',$sp,$matches) == 0)
-							if ($i < 0)
+								{
-							{	// No 'special' break character found - break at the word boundary
+									// Make any problems obvious for now
-							  $pulled = $matches[1];
+									$value .= '[!<b>invalid utf-8: '.$sp.'<b>!]';
 									$sp = '';
 								}
 								elseif (empty($matches[2]))
 								{  // utf-8 length is less than specified - treat as a special case
 									$value .= $sp;
 									$sp = '';
 								}
 								else
 								{		// Need to find somewhere to break the string
 									for ($i = strlen($matches[1])-1; $i >= 0; $i--)
 									{
 										if (strpos($lbrks,$matches[1][$i]) !== FALSE) break;
 									}
 									if ($i < 0)
 									{	// No 'special' break character found - break at the word boundary
 										$pulled = $matches[1];
 									}
 									else
 									{
 										$pulled = substr($sp,0,$i+1);
 									}
 								}
 								$loopCount++;
 								if ($loopCount > 20)
 								{
 									// Make any problems obvious for now
 									$value .= '[!<b>loop count exceeded: '.$sp.'</b>!]';
 									$sp = '';
 								}
 							}
 							else
 							{
-							  $pulled = substr($sp,0,$i+1);
+								for ($i = min($width,strlen($sp)); $i > 0; $i--)
 								{
 									// No speed advantage to defining match character
 									if (strpos($lbrks,$sp[$i-1]) !== FALSE)
 										break;
 								}
 								if ($i == 0)
 								{
 									// No 'special' break boundary character found - break at the word boundary
 									$pulled = substr($sp,0,$width);
 								}
 								else
 								{
 									$pulled = substr($sp,0,$i);
 								}
 							}
-						  }
+							if ($pulled)
 						  $loopCount++;
 						  if ($loopCount > 20)
 						  {
 							$value .= '[!<b>loop count exceeded: '.$sp.'</b>!]';		// Make any problems obvious for now
 							$sp = '';
 						  }
 						}
 						else
 						{
 							for ($i = min($width,strlen($sp)); $i > 0; $i--)
 							{
-							  if (strpos($lbrks,$sp[$i-1]) !== FALSE) break;		// No speed advantage to defining match character
+								$value .= $pulled.$break;
-							}
+								// Shorten $sp by whatever we've processed (will work even for utf-8)
-							if ($i == 0)
+								$sp = substr($sp,strlen($pulled));
 							{	// No 'special' break boundary character found - break at the word boundary
 							  $pulled = substr($sp,0,$width);
 							}
 							else
 							{
 							  $pulled = substr($sp,0,$i);
 							}
 						}
-						if ($pulled)
+						// Add in any residue
-						{
+						$value .= $sp;
 						  $value .= $pulled.$break;
 						  $sp = substr($sp,strlen($pulled));			// Shorten $sp by whatever we've processed (will work even for utf-8)
 						}
 					}
-					$value .= $sp;		// Add in any residue
+					// Put captured HTML entities back into the string
 					foreach ($ents[0] as $ent) $value = preg_replace("/\x06/", $ent, $value, 1);
 				}
 				// Put captured HTML entities back into the string
 				foreach ($ents[0] as $ent) $value = preg_replace("/\x06/", $ent, $value, 1);
 			  }
 			}
 			// Send the modified segment down the drain
 			$drain .= $value;