Fix for issue #1866.

2025-10-18 16:26:20 +02:00 · 2016-09-20 11:28:17 +02:00
parent 120949be04
commit 4ca9aa9a3f
1 changed files with 46 additions and 2 deletions
--- a/e107_handlers/date_handler.php
+++ b/e107_handlers/date_handler.php
@@ -197,7 +197,51 @@ class convert
 			break;
 		}

-		return utf8_encode(strftime($mask, $datestamp));
+		$dateString = strftime($mask, $datestamp);
+
+		if (!$this->detectUTF8($dateString)) {
+			$dateString = utf8_encode($dateString);
+		}
+
+		return $dateString;
+	}
+
+
+	/**
+	 * FIXME - find a better place for this function?
+	 *
+	 * Checks if string is valid UTF-8.
+	 *
+	 * Try to detect UTF-8 using mb_detect_encoding(). If mb string extension is
+	 * not installed, we try to use a simple UTF-8-ness checker using a regular
+	 * expression originally created by the W3C. But W3C's function scans the
+	 * entire strings and checks that it conforms to UTF-8.
+	 *
+	 * @see http://w3.org/International/questions/qa-forms-utf-8.html
+	 *
+	 * So this function is faster and less specific. It only looks for non-ascii
+	 * multibyte sequences in the UTF-8 range and also to stop once it finds at
+	 * least one multibytes string. This is quite a lot faster.
+	 *
+	 * @param $string
+	 *    The string being checked.
+	 * @return bool
+	 *    Returns true if $string is valid UTF-8 and false otherwise.
+	 */
+	function detectUTF8($string) {
+		if (function_exists('mb_detect_encoding')) {
+			return (mb_detect_encoding($string) == "UTF-8");
+		}
+
+		return (bool) preg_match('%(?:
+        [\xC2-\xDF][\x80-\xBF]        # non-overlong 2-byte
+        |\xE0[\xA0-\xBF][\x80-\xBF]               # excluding overlongs
+        |[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}      # straight 3-byte
+        |\xED[\x80-\x9F][\x80-\xBF]               # excluding surrogates
+        |\xF0[\x90-\xBF][\x80-\xBF]{2}    # planes 1-3
+        |[\xF1-\xF3][\x80-\xBF]{3}                  # planes 4-15
+        |\xF4[\x80-\x8F][\x80-\xBF]{2}    # plane 16
+        )+%xs', $string);
 	}