1
0
mirror of https://github.com/phpbb/phpbb.git synced 2025-06-21 18:44:37 +02:00

commenting some code :D

git-svn-id: file:///svn/phpbb/trunk@6376 89ea8834-ac86-4346-8a33-228a782c2dd0
This commit is contained in:
David M
2006-09-17 02:52:19 +00:00
parent bbc4a0c3fe
commit c6c3df2a73

View File

@ -17,13 +17,15 @@
* @package phpBB3
*/
// huge chunks of this code belong to the PHP UTF-8 project
// TODO: document the functions!
// utf8_encode and utf8_decode are both XML functions
if (!extension_loaded('xml'))
{
// This function exploits some nice things that ISO-8859-1 and UTF-8 have in common
/**
* Implementation of PHP's native utf8_encode for people without XML support
* This function exploits some nice things that ISO-8859-1 and UTF-8 have in common
*
* @param string $str ISO-8859-1 encoded data
* @return string UTF-8 encoded data
*/
function utf8_encode($str)
{
$out = '';
@ -48,7 +50,13 @@ if (!extension_loaded('xml'))
return $out;
}
// "borrowed" from getID3
/**
* Implementation of PHP's native utf8_decode for people without XML support
*
* @author GetID3()
* @param string $string UTF-8 encoded data
* @return string ISO-8859-1 encoded data
*/
function utf8_decode($string)
{
$newcharstring = '';
@ -106,6 +114,16 @@ if (!extension_loaded('xml'))
// if mbstring is not loaded, we go into native mode.
if (extension_loaded('mbstring'))
{
/**
* UTF-8 aware alternative to strrpos
* Find position of last occurrence of a char in a string
*
* @author Harry Fuecks
* @param string haystack
* @param string needle
* @param integer (optional) offset (from left)
* @return mixed integer position or FALSE on failure
*/
function utf8_strrpos($str, $needle, $offset = null)
{
// offset for mb_strrpos was added in 5.2.0
@ -137,6 +155,16 @@ if (extension_loaded('mbstring'))
}
}
/**
* UTF-8 aware alternative to strpos
* Find position of first occurrence of a string
*
* @author Harry Fuecks
* @param string haystack
* @param string needle
* @param integer offset in characters (from left)
* @return mixed integer position or FALSE on failure
*/
function utf8_strpos($str, $needle, $offset = null)
{
if ($offset === false)
@ -149,16 +177,50 @@ if (extension_loaded('mbstring'))
}
}
/**
* UTF-8 aware alternative to strtolower
* Make a string lowercase
* Note: The concept of a characters "case" only exists is some alphabets
* such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does
* not exist in the Chinese alphabet, for example. See Unicode Standard
* Annex #21: Case Mappings
*
* @author Andreas Gohr <andi@splitbrain.org>
* @param string
* @return mixed either string in lowercase or FALSE is UTF-8 invalid
*/
function utf8_strtolower($str)
{
return mb_strtolower($str);
}
/**
* UTF-8 aware alternative to strtoupper
* Make a string uppercase
* Note: The concept of a characters "case" only exists is some alphabets
* such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does
* not exist in the Chinese alphabet, for example. See Unicode Standard
* Annex #21: Case Mappings
*
* @author Andreas Gohr <andi@splitbrain.org>
* @param string
* @return mixed either string in lowercase or FALSE is UTF-8 invalid
*/
function utf8_strtoupper($str)
{
return mb_strtoupper($str);
}
/**
* UTF-8 aware alternative to substr
* Return part of a string given character offset (and optionally length)
*
* @author Harry Fuecks
* @param string
* @param integer number of UTF-8 characters offset (from left)
* @param integer (optional) length in UTF-8 characters from offset
* @return mixed string or FALSE if failure
*/
function utf8_substr($str, $offset, $length = null)
{
if ($length === false)
@ -170,9 +232,30 @@ if (extension_loaded('mbstring'))
return mb_substr($str, $offset, $length);
}
}
/**
* Return the length (in characters) of a UTF-8 string
*
* @param string $text UTF-8 string
* @return integer Length (in chars) of given string
*/
function utf8_strlen($text)
{
return mb_strlen($text, 'utf-8');
}
}
else
{
/**
* UTF-8 aware alternative to strrpos
* Find position of last occurrence of a char in a string
*
* @author Harry Fuecks
* @param string haystack
* @param string needle
* @param integer (optional) offset (from left)
* @return mixed integer position or FALSE on failure
*/
function utf8_strrpos($str, $needle, $offset = null)
{
if (is_null($offset))
@ -207,6 +290,16 @@ else
}
}
/**
* UTF-8 aware alternative to strpos
* Find position of first occurrence of a string
*
* @author Harry Fuecks
* @param string haystack
* @param string needle
* @param integer offset in characters (from left)
* @return mixed integer position or FALSE on failure
*/
function utf8_strpos($str, $needle, $offset = null)
{
// native
@ -330,6 +423,18 @@ $UTF8_LOWER_TO_UPPER = array(
0x00F0=>0x00D0, 0x0457=>0x0407, 0x0123=>0x0122,
);
/**
* UTF-8 aware alternative to strtolower
* Make a string lowercase
* Note: The concept of a characters "case" only exists is some alphabets
* such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does
* not exist in the Chinese alphabet, for example. See Unicode Standard
* Annex #21: Case Mappings
*
* @author Andreas Gohr <andi@splitbrain.org>
* @param string
* @return mixed either string in lowercase or FALSE is UTF-8 invalid
*/
function utf8_strtolower($string)
{
global $UTF8_UPPER_TO_LOWER;
@ -351,6 +456,18 @@ $UTF8_LOWER_TO_UPPER = array(
return utf8_from_unicode($uni);
}
/**
* UTF-8 aware alternative to strtoupper
* Make a string uppercase
* Note: The concept of a characters "case" only exists is some alphabets
* such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does
* not exist in the Chinese alphabet, for example. See Unicode Standard
* Annex #21: Case Mappings
*
* @author Andreas Gohr <andi@splitbrain.org>
* @param string
* @return mixed either string in lowercase or FALSE is UTF-8 invalid
*/
function utf8_strtoupper($str)
{
global $UTF8_LOWER_TO_UPPER;
@ -372,6 +489,16 @@ $UTF8_LOWER_TO_UPPER = array(
return utf8_from_unicode($uni);
}
/**
* UTF-8 aware alternative to substr
* Return part of a string given character offset (and optionally length)
*
* @author Harry Fuecks
* @param string
* @param integer number of UTF-8 characters offset (from left)
* @param integer (optional) length in UTF-8 characters from offset
* @return mixed string or FALSE if failure
*/
function utf8_substr($str, $offset, $length = null)
{
if ($offset >= 0 && $length >= 0)
@ -436,8 +563,30 @@ $UTF8_LOWER_TO_UPPER = array(
}
}
}
/**
* Return the length (in characters) of a UTF-8 string
*
* @param string $text UTF-8 string
* @return integer Length (in chars) of given string
*/
function utf8_strlen($text)
{
// Since utf8_decode is replacing multibyte characters to ? strlen works fine
return strlen(utf8_decode($text));
}
}
/**
* UTF-8 aware alternative to str_split
* Convert a string to an array
*
* @author Harry Fuecks
* @param string UTF-8 encoded
* @param int number to characters to split string by
* @return string characters in string reverses
*/
function utf8_str_split($str, $split_len = 1)
{
if (!preg_match('/^[0-9]+$/', $split_len) || $split_len < 1)
@ -455,6 +604,14 @@ function utf8_str_split($str, $split_len = 1)
return $ar[0];
}
/**
* UTF-8 aware alternative to strcspn
* Find length of initial segment not matching mask
*
* @author Harry Fuecks
* @param string
* @return int
*/
function utf8_strspn($str, $mask, $start = null, $length = null)
{
$mask = preg_replace('!([\\\\\\-\\]\\[/^])!', '\\\${1}', $mask);
@ -474,6 +631,14 @@ function utf8_strspn($str, $mask, $start = null, $length = null)
return 0;
}
/**
* UTF-8 aware alternative to ucfirst
* Make a string's first character uppercase
*
* @author Harry Fuecks
* @param string
* @return string with first character as upper case (if applicable)
*/
function utf8_ucfirst($str)
{
switch (utf8_strlen($str))
@ -493,28 +658,6 @@ function utf8_ucfirst($str)
}
}
/**
* Return the length (in characters) of a UTF-8 string
*
* @param string $text UTF-8 string
* @return integer Length (in chars) of given string
*/
function utf8_strlen($text)
{
if (function_exists('iconv_strlen'))
{
return iconv_strlen($text, 'utf-8');
}
if (function_exists('mb_strlen'))
{
return mb_strlen($text, 'utf-8');
}
// Since utf8_decode is replacing multibyte characters to ? strlen works fine
return strlen(utf8_decode($text));
}
/**
* Recode a string to UTF-8
*
@ -614,6 +757,12 @@ function utf8_encode_ncr_callback($m)
return '&#' . utf8_ord($m[0]) . ';';
}
/**
* Enter description here...
*
* @param string $chr UTF-8 char
* @return integer UNICODE code point
*/
function utf8_ord($chr)
{
switch (strlen($chr))
@ -639,6 +788,12 @@ function utf8_ord($chr)
}
}
/**
* Converts an NCR to a UTF-8 char
*
* @param integer $cp UNICODE code point
* @return string UTF-8 char
*/
function utf8_chr($cp)
{
if ($cp > 0xFFFF)
@ -694,7 +849,9 @@ function utf8_decode_ncr_callback($m)
/**
* Takes an UTF-8 string and returns an array of ints representing the
* Unicode characters.
*
* @param string UTF-8 encoded string
* @return array array of UNICODE code points
*/
function utf8_to_unicode($string)
{
@ -752,7 +909,8 @@ function utf8_to_unicode($string)
* Takes an array of ints representing the Unicode characters and returns
* a UTF-8 string.
*
* @param array of unicode code points representing a string
* @param array $array array of unicode code points representing a string
* @return string UTF-8 character string
*/
function utf8_from_unicode($array)
{