Issue: #1885 #1866 - Moved utf8 detection method to parser class.

2025-08-10 16:46:50 +02:00 · 2016-09-28 10:26:39 -07:00
parent 0d75bb2651
commit bb357b2035
2 changed files with 46 additions and 38 deletions
--- a/e107_handlers/date_handler.php
+++ b/e107_handlers/date_handler.php
@@ -142,7 +142,8 @@ class convert
 	 * 
 	 * @return string parsed date
 	 */
-	function convert_date($datestamp, $mask = '') {
+	function convert_date($datestamp, $mask = '')
+	{
 		if(empty($mask))
 		{
 			$mask = 'long';
@@ -199,7 +200,8 @@ class convert

 		$dateString = strftime($mask, $datestamp);

-		if (!$this->detectUTF8($dateString)) {
+		if (!e107::getParser()->isUTF8($dateString))
+		{
 			$dateString = utf8_encode($dateString);
 		}

@@ -207,42 +209,6 @@ class convert
 	}


-	/**
-	 * FIXME - find a better place for this function?
-	 *
-	 * Checks if string is valid UTF-8.
-	 *
-	 * Try to detect UTF-8 using mb_detect_encoding(). If mb string extension is
-	 * not installed, we try to use a simple UTF-8-ness checker using a regular
-	 * expression originally created by the W3C. But W3C's function scans the
-	 * entire strings and checks that it conforms to UTF-8.
-	 *
-	 * @see http://w3.org/International/questions/qa-forms-utf-8.html
-	 *
-	 * So this function is faster and less specific. It only looks for non-ascii
-	 * multibyte sequences in the UTF-8 range and also to stop once it finds at
-	 * least one multibytes string. This is quite a lot faster.
-	 *
-	 * @param $string
-	 *    The string being checked.
-	 * @return bool
-	 *    Returns true if $string is valid UTF-8 and false otherwise.
-	 */
-	function detectUTF8($string) {
-		if (function_exists('mb_detect_encoding')) {
-			return (mb_detect_encoding($string) == "UTF-8");
-		}
-
-		return (bool) preg_match('%(?:
-        [\xC2-\xDF][\x80-\xBF]        # non-overlong 2-byte
-        |\xE0[\xA0-\xBF][\x80-\xBF]               # excluding overlongs
-        |[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}      # straight 3-byte
-        |\xED[\x80-\x9F][\x80-\xBF]               # excluding surrogates
-        |\xF0[\x90-\xBF][\x80-\xBF]{2}    # planes 1-3
-        |[\xF1-\xF3][\x80-\xBF]{3}                  # planes 4-15
-        |\xF4[\x80-\x8F][\x80-\xBF]{2}    # plane 16
-        )+%xs', $string);
-	}


 	/**
--- a/e107_handlers/e_parse_class.php
+++ b/e107_handlers/e_parse_class.php
@@ -3818,6 +3818,48 @@ class e_parser



+
+	/**
+	 * Checks if string is valid UTF-8.
+	 *
+	 * Try to detect UTF-8 using mb_detect_encoding(). If mb string extension is
+	 * not installed, we try to use a simple UTF-8-ness checker using a regular
+	 * expression originally created by the W3C. But W3C's function scans the
+	 * entire strings and checks that it conforms to UTF-8.
+	 *
+	 * @see http://w3.org/International/questions/qa-forms-utf-8.html
+	 *
+	 * So this function is faster and less specific. It only looks for non-ascii
+	 * multibyte sequences in the UTF-8 range and also to stop once it finds at
+	 * least one multibytes string. This is quite a lot faster.
+	 *
+	 * @param $string string  string being checked.
+	 * @return bool  Returns true if $string is valid UTF-8 and false otherwise.
+	 */
+	public function isUTF8($string)
+	{
+		if (function_exists('mb_detect_encoding'))
+		{
+			return (mb_detect_encoding($string) == "UTF-8");
+		}
+
+		return (bool) preg_match('%(?:
+        [\xC2-\xDF][\x80-\xBF]        # non-overlong 2-byte
+        |\xE0[\xA0-\xBF][\x80-\xBF]               # excluding overlongs
+        |[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}      # straight 3-byte
+        |\xED[\x80-\x9F][\x80-\xBF]               # excluding surrogates
+        |\xF0[\x90-\xBF][\x80-\xBF]{2}    # planes 1-3
+        |[\xF1-\xF3][\x80-\xBF]{3}                  # planes 4-15
+        |\xF4[\x80-\x8F][\x80-\xBF]{2}    # plane 16
+        )+%xs', $string);
+
+	}
+
+
+
+
+
+
 	/**
 	 * Check if a file is an video or not. 
 	 * @param $file string