1
0
mirror of https://github.com/e107inc/e107.git synced 2025-01-29 10:38:08 +01:00

Issue: #1885 #1866 - Moved utf8 detection method to parser class.

This commit is contained in:
Cameron 2016-09-28 10:26:39 -07:00
parent 0d75bb2651
commit bb357b2035
2 changed files with 46 additions and 38 deletions

View File

@ -142,7 +142,8 @@ class convert
*
* @return string parsed date
*/
function convert_date($datestamp, $mask = '') {
function convert_date($datestamp, $mask = '')
{
if(empty($mask))
{
$mask = 'long';
@ -199,7 +200,8 @@ class convert
$dateString = strftime($mask, $datestamp);
if (!$this->detectUTF8($dateString)) {
if (!e107::getParser()->isUTF8($dateString))
{
$dateString = utf8_encode($dateString);
}
@ -207,42 +209,6 @@ class convert
}
/**
* FIXME - find a better place for this function?
*
* Checks if string is valid UTF-8.
*
* Try to detect UTF-8 using mb_detect_encoding(). If mb string extension is
* not installed, we try to use a simple UTF-8-ness checker using a regular
* expression originally created by the W3C. But W3C's function scans the
* entire strings and checks that it conforms to UTF-8.
*
* @see http://w3.org/International/questions/qa-forms-utf-8.html
*
* So this function is faster and less specific. It only looks for non-ascii
* multibyte sequences in the UTF-8 range and also to stop once it finds at
* least one multibytes string. This is quite a lot faster.
*
* @param $string
* The string being checked.
* @return bool
* Returns true if $string is valid UTF-8 and false otherwise.
*/
function detectUTF8($string) {
if (function_exists('mb_detect_encoding')) {
return (mb_detect_encoding($string) == "UTF-8");
}
return (bool) preg_match('%(?:
[\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
|\xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte
|\xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
|\xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
|[\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
|\xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
)+%xs', $string);
}
/**

View File

@ -3818,6 +3818,48 @@ class e_parser
/**
* Checks if string is valid UTF-8.
*
* Try to detect UTF-8 using mb_detect_encoding(). If mb string extension is
* not installed, we try to use a simple UTF-8-ness checker using a regular
* expression originally created by the W3C. But W3C's function scans the
* entire strings and checks that it conforms to UTF-8.
*
* @see http://w3.org/International/questions/qa-forms-utf-8.html
*
* So this function is faster and less specific. It only looks for non-ascii
* multibyte sequences in the UTF-8 range and also to stop once it finds at
* least one multibytes string. This is quite a lot faster.
*
* @param $string string string being checked.
* @return bool Returns true if $string is valid UTF-8 and false otherwise.
*/
public function isUTF8($string)
{
if (function_exists('mb_detect_encoding'))
{
return (mb_detect_encoding($string) == "UTF-8");
}
return (bool) preg_match('%(?:
[\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
|\xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte
|\xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
|\xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
|[\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
|\xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
)+%xs', $string);
}
/**
* Check if a file is an video or not.
* @param $file string