diff --git a/lib/thirdpartylibs.xml b/lib/thirdpartylibs.xml index 13a9644d774..e582b64a377 100644 --- a/lib/thirdpartylibs.xml +++ b/lib/thirdpartylibs.xml @@ -221,7 +221,7 @@ typo3 Typo3 GPL - 4.2.1 + 4.5.0 2.0+ diff --git a/lib/typo3/class.t3lib_cs.php b/lib/typo3/class.t3lib_cs.php index 7885d6327f5..0bcbfe2f8d8 100644 --- a/lib/typo3/class.t3lib_cs.php +++ b/lib/typo3/class.t3lib_cs.php @@ -1,32 +1,32 @@ + * @author Kasper Skårhøj * @author Martin Kutschker */ /** @@ -35,53 +35,53 @@ * * * 136: class t3lib_cs - * 488: function parse_charset($charset) - * 507: function get_locale_charset($locale) + * 488: function parse_charset($charset) + * 507: function get_locale_charset($locale) * - * SECTION: Charset Conversion functions - * 560: function conv($str,$fromCS,$toCS,$useEntityForNoChar=0) - * 600: function convArray(&$array,$fromCS,$toCS,$useEntityForNoChar=0) - * 617: function utf8_encode($str,$charset) - * 663: function utf8_decode($str,$charset,$useEntityForNoChar=0) - * 706: function utf8_to_entities($str) - * 739: function entities_to_utf8($str,$alsoStdHtmlEnt=0) - * 773: function utf8_to_numberarray($str,$convEntities=0,$retChar=0) - * 823: function UnumberToChar($cbyte) - * 868: function utf8CharToUnumber($str,$hex=0) + * SECTION: Charset Conversion functions + * 560: function conv($str,$fromCS,$toCS,$useEntityForNoChar=0) + * 600: function convArray(&$array,$fromCS,$toCS,$useEntityForNoChar=0) + * 617: function utf8_encode($str,$charset) + * 663: function utf8_decode($str,$charset,$useEntityForNoChar=0) + * 706: function utf8_to_entities($str) + * 739: function entities_to_utf8($str,$alsoStdHtmlEnt=0) + * 773: function utf8_to_numberarray($str,$convEntities=0,$retChar=0) + * 823: function UnumberToChar($cbyte) + * 868: function utf8CharToUnumber($str,$hex=0) * - * SECTION: Init functions - * 911: function initCharset($charset) - * 973: function initUnicodeData($mode=null) - * 1198: function initCaseFolding($charset) - * 1260: function initToASCII($charset) + * SECTION: Init functions + * 911: function initCharset($charset) + * 973: function initUnicodeData($mode=null) + * 1198: function initCaseFolding($charset) + * 1260: function initToASCII($charset) * - * SECTION: String operation functions - * 1331: function substr($charset,$string,$start,$len=null) - * 1384: function strlen($charset,$string) - * 1414: function crop($charset,$string,$len,$crop='') - * 1467: function strtrunc($charset,$string,$len) - * 1501: function conv_case($charset,$string,$case) - * 1527: function specCharsToASCII($charset,$string) + * SECTION: String operation functions + * 1331: function substr($charset,$string,$start,$len=null) + * 1384: function strlen($charset,$string) + * 1414: function crop($charset,$string,$len,$crop='') + * 1467: function strtrunc($charset,$string,$len) + * 1501: function conv_case($charset,$string,$case) + * 1527: function specCharsToASCII($charset,$string) * - * SECTION: Internal string operation functions - * 1567: function sb_char_mapping($str,$charset,$mode,$opt='') + * SECTION: Internal string operation functions + * 1567: function sb_char_mapping($str,$charset,$mode,$opt='') * - * SECTION: Internal UTF-8 string operation functions - * 1622: function utf8_substr($str,$start,$len=null) - * 1655: function utf8_strlen($str) - * 1676: function utf8_strtrunc($str,$len) - * 1698: function utf8_strpos($haystack,$needle,$offset=0) - * 1723: function utf8_strrpos($haystack,$needle) - * 1745: function utf8_char2byte_pos($str,$pos) - * 1786: function utf8_byte2char_pos($str,$pos) - * 1809: function utf8_char_mapping($str,$mode,$opt='') + * SECTION: Internal UTF-8 string operation functions + * 1622: function utf8_substr($str,$start,$len=null) + * 1655: function utf8_strlen($str) + * 1676: function utf8_strtrunc($str,$len) + * 1698: function utf8_strpos($haystack,$needle,$offset=0) + * 1723: function utf8_strrpos($haystack,$needle) + * 1745: function utf8_char2byte_pos($str,$pos) + * 1786: function utf8_byte2char_pos($str,$pos) + * 1809: function utf8_char_mapping($str,$mode,$opt='') * - * SECTION: Internal EUC string operation functions - * 1885: function euc_strtrunc($str,$len,$charset) - * 1914: function euc_substr($str,$start,$charset,$len=null) - * 1939: function euc_strlen($str,$charset) - * 1966: function euc_char2byte_pos($str,$pos,$charset) - * 2007: function euc_char_mapping($str,$charset,$mode,$opt='') + * SECTION: Internal EUC string operation functions + * 1885: function euc_strtrunc($str,$len,$charset) + * 1914: function euc_substr($str,$start,$charset,$len=null) + * 1939: function euc_strlen($str,$charset) + * 1966: function euc_char2byte_pos($str,$pos,$charset) + * 2007: function euc_char_mapping($str,$charset,$mode,$opt='') * * TOTAL FUNCTIONS: 35 * (This index is automatically created/updated by the extension "extdeveval") @@ -89,12 +89,6 @@ */ - - - - - - /** * Notes on UTF-8 * @@ -127,47 +121,47 @@ /** * Class for conversion between charsets * - * @author Kasper Skaarhoj + * @author Kasper Skårhøj * @author Martin Kutschker * @package TYPO3 * @subpackage t3lib */ class t3lib_cs { - var $noCharByteVal=63; // ASCII Value for chars with no equivalent. + var $noCharByteVal = 63; // ASCII Value for chars with no equivalent. // This is the array where parsed conversion tables are stored (cached) - var $parsedCharsets=array(); + var $parsedCharsets = array(); // An array where case folding data will be stored (cached) - var $caseFolding=array(); + var $caseFolding = array(); // An array where charset-to-ASCII mappings are stored (cached) - var $toASCII=array(); + var $toASCII = array(); // This tells the converter which charsets has two bytes per char: - var $twoByteSets=array( - 'ucs-2'=>1, // 2-byte Unicode + var $twoByteSets = array( + 'ucs-2' => 1, // 2-byte Unicode ); // This tells the converter which charsets has four bytes per char: - var $fourByteSets=array( - 'ucs-4'=>1, // 4-byte Unicode - 'utf-32'=>1, // 4-byte Unicode (limited to the 21-bits of UTF-16) + var $fourByteSets = array( + 'ucs-4' => 1, // 4-byte Unicode + 'utf-32' => 1, // 4-byte Unicode (limited to the 21-bits of UTF-16) ); // This tells the converter which charsets use a scheme like the Extended Unix Code: - var $eucBasedSets=array( - 'gb2312'=>1, // Chinese, simplified. - 'big5'=>1, // Chinese, traditional. - 'euc-kr'=>1, // Korean - 'shift_jis'=>1, // Japanese - WARNING: Shift-JIS includes half-width katakana single-bytes characters above 0x80! + var $eucBasedSets = array( + 'gb2312' => 1, // Chinese, simplified. + 'big5' => 1, // Chinese, traditional. + 'euc-kr' => 1, // Korean + 'shift_jis' => 1, // Japanese - WARNING: Shift-JIS includes half-width katakana single-bytes characters above 0x80! ); // see http://developer.apple.com/documentation/macos8/TextIntlSvcs/TextEncodingConversionManager/TEC1.5/TEC.b0.html // http://czyborra.com/charsets/iso8859.html - var $synonyms=array( + var $synonyms = array( 'us' => 'ascii', - 'us-ascii'=> 'ascii', + 'us-ascii' => 'ascii', 'cp819' => 'iso-8859-1', 'ibm819' => 'iso-8859-1', 'iso-ir-100' => 'iso-8859-1', @@ -252,114 +246,117 @@ class t3lib_cs { ); // mapping of iso-639-1 language codes to script names - var $lang_to_script=array( + var $lang_to_script = array( // iso-639-1 language codes, see http://www.loc.gov/standards/iso639-2/php/code_list.php 'ar' => 'arabic', - 'bg' => 'cyrillic', // Bulgarian - 'bs' => 'east_european', // Bosnian - 'cs' => 'east_european', // Czech - 'da' => 'west_european', // Danish - 'de' => 'west_european', // German - 'es' => 'west_european', // Spanish + 'bg' => 'cyrillic', // Bulgarian + 'bs' => 'east_european', // Bosnian + 'cs' => 'east_european', // Czech + 'da' => 'west_european', // Danish + 'de' => 'west_european', // German + 'es' => 'west_european', // Spanish 'et' => 'estonian', - 'eo' => 'unicode', // Esperanto - 'eu' => 'west_european', // Basque - 'fa' => 'arabic', // Persian - 'fi' => 'west_european', // Finish - 'fo' => 'west_european', // Faroese - 'fr' => 'west_european', // French - 'ga' => 'west_european', // Galician - 'ge' => 'unicode', // Georgian + 'eo' => 'unicode', // Esperanto + 'eu' => 'west_european', // Basque + 'fa' => 'arabic', // Persian + 'fi' => 'west_european', // Finish + 'fo' => 'west_european', // Faroese + 'fr' => 'west_european', // French + 'ga' => 'west_european', // Irish + 'gl' => 'west_european', // Galician 'gr' => 'greek', - 'he' => 'hebrew', // Hebrew (since 1998) - 'hi' => 'unicode', // Hindi - 'hr' => 'east_european', // Croatian - 'hu' => 'east_european', // Hungarian - 'iw' => 'hebrew', // Hebrew (til 1998) - 'is' => 'west_european', // Icelandic - 'it' => 'west_european', // Italian + 'he' => 'hebrew', // Hebrew (since 1998) + 'hi' => 'unicode', // Hindi + 'hr' => 'east_european', // Croatian + 'hu' => 'east_european', // Hungarian + 'iw' => 'hebrew', // Hebrew (til 1998) + 'is' => 'west_european', // Icelandic + 'it' => 'west_european', // Italian 'ja' => 'japanese', - 'kl' => 'west_european', // Greenlandic + 'ka' => 'unicode', // Georgian + 'kl' => 'west_european', // Greenlandic + 'km' => 'unicode', // Khmer 'ko' => 'korean', 'lt' => 'lithuanian', - 'lv' => 'west_european', // Latvian/Lettish - 'nl' => 'west_european', // Dutch - 'no' => 'west_european', // Norwegian - 'nb' => 'west_european', // Norwegian Bokmal - 'nn' => 'west_european', // Norwegian Nynorsk - 'pl' => 'east_european', // Polish - 'pt' => 'west_european', // Portuguese - 'ro' => 'east_european', // Romanian - 'ru' => 'cyrillic', // Russian - 'sk' => 'east_european', // Slovak - 'sl' => 'east_european', // Slovenian - 'sr' => 'cyrillic', // Serbian - 'sv' => 'west_european', // Swedish - 'sq' => 'albanian', // Albanian + 'lv' => 'west_european', // Latvian/Lettish + 'nl' => 'west_european', // Dutch + 'no' => 'west_european', // Norwegian + 'nb' => 'west_european', // Norwegian Bokmal + 'nn' => 'west_european', // Norwegian Nynorsk + 'pl' => 'east_european', // Polish + 'pt' => 'west_european', // Portuguese + 'ro' => 'east_european', // Romanian + 'ru' => 'cyrillic', // Russian + 'sk' => 'east_european', // Slovak + 'sl' => 'east_european', // Slovenian + 'sr' => 'cyrillic', // Serbian + 'sv' => 'west_european', // Swedish + 'sq' => 'albanian', // Albanian 'th' => 'thai', - 'uk' => 'cyrillic', // Ukranian + 'uk' => 'cyrillic', // Ukranian 'vi' => 'vietnamese', 'zh' => 'chinese', // MS language codes, see http://msdn.microsoft.com/library/default.asp?url=/library/en-us/vclib/html/_crt_language_strings.asp // http://msdn.microsoft.com/library/default.asp?url=/library/en-us/wceinternational5/html/wce50conLanguageIdentifiersandLocales.asp 'ara' => 'arabic', - 'bgr' => 'cyrillic', // Bulgarian - 'cat' => 'west_european', // Catalan + 'bgr' => 'cyrillic', // Bulgarian + 'cat' => 'west_european', // Catalan 'chs' => 'simpl_chinese', 'cht' => 'trad_chinese', - 'csy' => 'east_european', // Czech - 'dan' => 'west_european', // Danisch - 'deu' => 'west_european', // German - 'dea' => 'west_european', // German (Austrian) - 'des' => 'west_european', // German (Swiss) - 'ena' => 'west_european', // English (Australian) - 'enc' => 'west_european', // English (Canadian) - 'eng' => 'west_european', // English - 'enz' => 'west_european', // English (New Zealand) - 'enu' => 'west_european', // English (United States) - 'euq' => 'west_european', // Basque - 'fos' => 'west_european', // Faroese - 'far' => 'arabic', // Persian - 'fin' => 'west_european', // Finish - 'fra' => 'west_european', // French - 'frb' => 'west_european', // French (Belgian) - 'frc' => 'west_european', // French (Canadian) - 'frs' => 'west_european', // French (Swiss) - 'geo' => 'unicode', // Georgian - 'glg' => 'west_european', // Galician + 'csy' => 'east_european', // Czech + 'dan' => 'west_european', // Danisch + 'deu' => 'west_european', // German + 'dea' => 'west_european', // German (Austrian) + 'des' => 'west_european', // German (Swiss) + 'ena' => 'west_european', // English (Australian) + 'enc' => 'west_european', // English (Canadian) + 'eng' => 'west_european', // English + 'enz' => 'west_european', // English (New Zealand) + 'enu' => 'west_european', // English (United States) + 'euq' => 'west_european', // Basque + 'fos' => 'west_european', // Faroese + 'far' => 'arabic', // Persian + 'fin' => 'west_european', // Finish + 'fra' => 'west_european', // French + 'frb' => 'west_european', // French (Belgian) + 'frc' => 'west_european', // French (Canadian) + 'frs' => 'west_european', // French (Swiss) + 'geo' => 'unicode', // Georgian + 'glg' => 'west_european', // Galician 'ell' => 'greek', 'heb' => 'hebrew', - 'hin' => 'unicode', // Hindi - 'hun' => 'east_european', // Hungarian - 'isl' => 'west_euorpean', // Icelandic - 'ita' => 'west_european', // Italian - 'its' => 'west_european', // Italian (Swiss) + 'hin' => 'unicode', // Hindi + 'hun' => 'east_european', // Hungarian + 'isl' => 'west_euorpean', // Icelandic + 'ita' => 'west_european', // Italian + 'its' => 'west_european', // Italian (Swiss) 'jpn' => 'japanese', + 'khm' => 'unicode', // Khmer 'kor' => 'korean', 'lth' => 'lithuanian', - 'lvi' => 'west_european', // Latvian/Lettish - 'msl' => 'west_european', // Malay - 'nlb' => 'west_european', // Dutch (Belgian) - 'nld' => 'west_european', // Dutch - 'nor' => 'west_european', // Norwegian (bokmal) - 'non' => 'west_european', // Norwegian (nynorsk) - 'plk' => 'east_european', // Polish - 'ptg' => 'west_european', // Portuguese - 'ptb' => 'west_european', // Portuguese (Brazil) - 'rom' => 'east_european', // Romanian - 'rus' => 'cyrillic', // Russian - 'slv' => 'east_european', // Slovenian - 'sky' => 'east_european', // Slovak - 'srl' => 'east_european', // Serbian (Latin) - 'srb' => 'cyrillic', // Serbian (Cyrillic) - 'esp' => 'west_european', // Spanish (trad. sort) - 'esm' => 'west_european', // Spanish (Mexican) - 'esn' => 'west_european', // Spanish (internat. sort) - 'sve' => 'west_european', // Swedish - 'sqi' => 'albanian', // Albanian + 'lvi' => 'west_european', // Latvian/Lettish + 'msl' => 'west_european', // Malay + 'nlb' => 'west_european', // Dutch (Belgian) + 'nld' => 'west_european', // Dutch + 'nor' => 'west_european', // Norwegian (bokmal) + 'non' => 'west_european', // Norwegian (nynorsk) + 'plk' => 'east_european', // Polish + 'ptg' => 'west_european', // Portuguese + 'ptb' => 'west_european', // Portuguese (Brazil) + 'rom' => 'east_european', // Romanian + 'rus' => 'cyrillic', // Russian + 'slv' => 'east_european', // Slovenian + 'sky' => 'east_european', // Slovak + 'srl' => 'east_european', // Serbian (Latin) + 'srb' => 'cyrillic', // Serbian (Cyrillic) + 'esp' => 'west_european', // Spanish (trad. sort) + 'esm' => 'west_european', // Spanish (Mexican) + 'esn' => 'west_european', // Spanish (internat. sort) + 'sve' => 'west_european', // Swedish + 'sqi' => 'albanian', // Albanian 'tha' => 'thai', 'trk' => 'turkish', - 'ukr' => 'cyrillic', // Ukrainian + 'ukr' => 'cyrillic', // Ukrainian // English language names 'albanian' => 'albanian', 'arabic' => 'arabic', @@ -388,6 +385,7 @@ class t3lib_cs { 'hungarian' => 'east_european', 'icelandic' => 'west_european', 'italian' => 'west_european', + 'khmer' => 'unicode', 'latvian' => 'west_european', 'lettish' => 'west_european', 'lithuanian' => 'lithuanian', @@ -409,7 +407,7 @@ class t3lib_cs { ); // mapping of language (family) names to charsets on Unix - var $script_to_charset_unix=array( + var $script_to_charset_unix = array( 'west_european' => 'iso-8859-1', 'estonian' => 'iso-8859-1', 'east_european' => 'iso-8859-2', @@ -432,7 +430,7 @@ class t3lib_cs { ); // mapping of language (family) names to charsets on Windows - var $script_to_charset_windows=array( + var $script_to_charset_windows = array( 'east_european' => 'windows-1250', 'cyrillic' => 'windows-1251', 'west_european' => 'windows-1252', @@ -455,7 +453,7 @@ class t3lib_cs { ); // mapping of locale names to charsets - var $locale_to_charset=array( + var $locale_to_charset = array( 'japanese.euc' => 'euc-jp', 'ja_jp.ujis' => 'euc-jp', 'korean.euc' => 'euc-kr', @@ -516,6 +514,8 @@ class t3lib_cs { 'sq' => 'utf-8', 'ge' => 'utf-8', 'ga' => '', + 'km' => 'utf-8', + 'qc' => '', ); // TYPO3 specific: Array with the iso names used for each system language in TYPO3: @@ -534,7 +534,10 @@ class t3lib_cs { 'kr' => 'ko', 'ua' => 'uk', 'jp' => 'ja', + 'qc' => 'fr_CA', 'vn' => 'vi', + 'ge' => 'ka', + 'ga' => 'gl', ); /** @@ -544,9 +547,11 @@ class t3lib_cs { * @return string Normalized charset * @author Martin Kutschker */ - function parse_charset($charset) { + function parse_charset($charset) { $charset = trim(strtolower($charset)); - if (isset($this->synonyms[$charset])) $charset = $this->synonyms[$charset]; + if (isset($this->synonyms[$charset])) { + $charset = $this->synonyms[$charset]; + } return $charset; } @@ -554,36 +559,44 @@ class t3lib_cs { /** * Get the charset of a locale. * - * ln language - * ln_CN language / country - * ln_CN.cs language / country / charset + * ln language + * ln_CN language / country + * ln_CN.cs language / country / charset * ln_CN.cs@mod language / country / charset / modifier * * @param string Locale string * @return string Charset resolved for locale string * @author Martin Kutschker */ - function get_locale_charset($locale) { + function get_locale_charset($locale) { $locale = strtolower($locale); // exact locale specific charset? - if (isset($this->locale_to_charset[$locale])) return $this->locale_to_charset[$locale]; + if (isset($this->locale_to_charset[$locale])) { + return $this->locale_to_charset[$locale]; + } // get modifier - list($locale,$modifier) = explode('@',$locale); + list($locale, $modifier) = explode('@', $locale); // locale contains charset: use it - list($locale,$charset) = explode('.',$locale); - if ($charset) return $this->parse_charset($charset); + list($locale, $charset) = explode('.', $locale); + if ($charset) { + return $this->parse_charset($charset); + } // modifier is 'euro' (after charset check, because of xx.utf-8@euro) - if ($modifier == 'euro') return 'iso-8859-15'; + if ($modifier == 'euro') { + return 'iso-8859-15'; + } // get language - list($language,$country) = explode('_',$locale); - if (isset($this->lang_to_script[$language])) $script = $this->lang_to_script[$language]; + list($language, $country) = explode('_', $locale); + if (isset($this->lang_to_script[$language])) { + $script = $this->lang_to_script[$language]; + } - if (TYPO3_OS == 'WIN') { + if (TYPO3_OS == 'WIN') { $cs = $this->script_to_charset_windows[$script] ? $this->script_to_charset_windows[$script] : 'windows-1252'; } else { $cs = $this->script_to_charset_unix[$script] ? $this->script_to_charset_unix[$script] : 'iso-8859-1'; @@ -593,13 +606,6 @@ class t3lib_cs { } - - - - - - - /******************************************** * * Charset Conversion functions @@ -616,37 +622,49 @@ class t3lib_cs { * @return string Converted string * @see convArray() */ - function conv($str,$fromCS,$toCS,$useEntityForNoChar=0) { - if ($fromCS==$toCS) return $str; + function conv($str, $fromCS, $toCS, $useEntityForNoChar = 0) { + if ($fromCS == $toCS) { + return $str; + } // PHP-libs don't support fallback to SGML entities, but UTF-8 handles everything - if ($toCS=='utf-8' || !$useEntityForNoChar) { - switch($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_convMethod']) { - case 'mbstring': - $conv_str = mb_convert_encoding($str,$toCS,$fromCS); - if (false !== $conv_str) return $conv_str; // returns false for unsupported charsets - break; + if ($toCS == 'utf-8' || !$useEntityForNoChar) { + switch ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_convMethod']) { + case 'mbstring': + $conv_str = mb_convert_encoding($str, $toCS, $fromCS); + if (FALSE !== $conv_str) { + return $conv_str; + } // returns false for unsupported charsets + break; - case 'iconv': - $conv_str = iconv($fromCS,$toCS.'//TRANSLIT',$str); - if (false !== $conv_str) return $conv_str; - break; + case 'iconv': + $conv_str = iconv($fromCS, $toCS . '//TRANSLIT', $str); + if (FALSE !== $conv_str) { + return $conv_str; + } + break; - case 'recode': - $conv_str = recode_string($fromCS.'..'.$toCS,$str); - if (false !== $conv_str) return $conv_str; - break; + case 'recode': + $conv_str = recode_string($fromCS . '..' . $toCS, $str); + if (FALSE !== $conv_str) { + return $conv_str; + } + break; } // fallback to TYPO3 conversion } - if ($fromCS!='utf-8') $str=$this->utf8_encode($str,$fromCS); - if ($toCS!='utf-8') $str=$this->utf8_decode($str,$toCS,$useEntityForNoChar); + if ($fromCS != 'utf-8') { + $str = $this->utf8_encode($str, $fromCS); + } + if ($toCS != 'utf-8') { + $str = $this->utf8_decode($str, $toCS, $useEntityForNoChar); + } return $str; } /** - * Convert all elements in ARRAY from one charset to another charset. + * Convert all elements in ARRAY with type string from one charset to another charset. * NOTICE: Array is passed by reference! * * @param string Input array, possibly multidimensional @@ -656,12 +674,12 @@ class t3lib_cs { * @return void * @see conv() */ - function convArray(&$array,$fromCS,$toCS,$useEntityForNoChar=0) { - foreach($array as $key => $value) { - if (is_array($array[$key])) { - $this->convArray($array[$key],$fromCS,$toCS,$useEntityForNoChar); - } else { - $array[$key] = $this->conv($array[$key],$fromCS,$toCS,$useEntityForNoChar); + function convArray(&$array, $fromCS, $toCS, $useEntityForNoChar = 0) { + foreach ($array as $key => $value) { + if (is_array($array[$key])) { + $this->convArray($array[$key], $fromCS, $toCS, $useEntityForNoChar); + } elseif (is_string($array[$key])) { + $array[$key] = $this->conv($array[$key], $fromCS, $toCS, $useEntityForNoChar); } } } @@ -673,39 +691,47 @@ class t3lib_cs { * @param string Charset, lowercase. Must be found in csconvtbl/ folder. * @return string Output string, converted to UTF-8 */ - function utf8_encode($str,$charset) { + function utf8_encode($str, $charset) { - if ($charset === 'utf-8') return $str; + if ($charset === 'utf-8') { + return $str; + } // Charset is case-insensitive. - if ($this->initCharset($charset)) { // Parse conv. table if not already... + if ($this->initCharset($charset)) { // Parse conv. table if not already... $strLen = strlen($str); - $outStr=''; + $outStr = ''; - for ($a=0;$a<$strLen;$a++) { // Traverse each char in string. - $chr=substr($str,$a,1); - $ord=ord($chr); - if (isset($this->twoByteSets[$charset])) { // If the charset has two bytes per char - $ord2 = ord($str{$a+1}); - $ord = $ord<<8 | $ord2; // assume big endian + for ($a = 0; $a < $strLen; $a++) { // Traverse each char in string. + $chr = substr($str, $a, 1); + $ord = ord($chr); + if (isset($this->twoByteSets[$charset])) { // If the charset has two bytes per char + $ord2 = ord($str{$a + 1}); + $ord = $ord << 8 | $ord2; // assume big endian - if (isset($this->parsedCharsets[$charset]['local'][$ord])) { // If the local char-number was found in parsed conv. table then we use that, otherwise 127 (no char?) - $outStr.=$this->parsedCharsets[$charset]['local'][$ord]; - } else $outStr.=chr($this->noCharByteVal); // No char exists + if (isset($this->parsedCharsets[$charset]['local'][$ord])) { // If the local char-number was found in parsed conv. table then we use that, otherwise 127 (no char?) + $outStr .= $this->parsedCharsets[$charset]['local'][$ord]; + } else { + $outStr .= chr($this->noCharByteVal); + } // No char exists $a++; - } elseif ($ord>127) { // If char has value over 127 it's a multibyte char in UTF-8 - if (isset($this->eucBasedSets[$charset])) { // EUC uses two-bytes above 127; we get both and advance pointer and make $ord a 16bit int. - if ($charset != 'shift_jis' || ($ord < 0xA0 || $ord > 0xDF)) { // Shift-JIS: chars between 160 and 223 are single byte + } elseif ($ord > 127) { // If char has value over 127 it's a multibyte char in UTF-8 + if (isset($this->eucBasedSets[$charset])) { // EUC uses two-bytes above 127; we get both and advance pointer and make $ord a 16bit int. + if ($charset != 'shift_jis' || ($ord < 0xA0 || $ord > 0xDF)) { // Shift-JIS: chars between 160 and 223 are single byte $a++; - $ord2=ord(substr($str,$a,1)); - $ord = $ord*256+$ord2; + $ord2 = ord(substr($str, $a, 1)); + $ord = $ord * 256 + $ord2; } } - if (isset($this->parsedCharsets[$charset]['local'][$ord])) { // If the local char-number was found in parsed conv. table then we use that, otherwise 127 (no char?) - $outStr.= $this->parsedCharsets[$charset]['local'][$ord]; - } else $outStr.= chr($this->noCharByteVal); // No char exists - } else $outStr.= $chr; // ... otherwise it's just ASCII 0-127 and one byte. Transparent + if (isset($this->parsedCharsets[$charset]['local'][$ord])) { // If the local char-number was found in parsed conv. table then we use that, otherwise 127 (no char?) + $outStr .= $this->parsedCharsets[$charset]['local'][$ord]; + } else { + $outStr .= chr($this->noCharByteVal); + } // No char exists + } else { + $outStr .= $chr; + } // ... otherwise it's just ASCII 0-127 and one byte. Transparent } return $outStr; } @@ -719,42 +745,52 @@ class t3lib_cs { * @param boolean If set, then characters that are not available in the destination character set will be encoded as numeric entities * @return string Output string, converted to local charset */ - function utf8_decode($str,$charset,$useEntityForNoChar=0) { + function utf8_decode($str, $charset, $useEntityForNoChar = 0) { if ($charset === 'utf-8') { return $str; } // Charset is case-insensitive. - if ($this->initCharset($charset)) { // Parse conv. table if not already... + if ($this->initCharset($charset)) { // Parse conv. table if not already... $strLen = strlen($str); - $outStr=''; - $buf=''; - for ($a=0,$i=0;$a<$strLen;$a++,$i++) { // Traverse each char in UTF-8 string. - $chr=substr($str,$a,1); - $ord=ord($chr); - if ($ord>127) { // This means multibyte! (first byte!) - if ($ord & 64) { // Since the first byte must have the 7th bit set we check that. Otherwise we might be in the middle of a byte sequence. + $outStr = ''; + $buf = ''; + for ($a = 0, $i = 0; $a < $strLen; $a++, $i++) { // Traverse each char in UTF-8 string. + $chr = substr($str, $a, 1); + $ord = ord($chr); + if ($ord > 127) { // This means multibyte! (first byte!) + if ($ord & 64) { // Since the first byte must have the 7th bit set we check that. Otherwise we might be in the middle of a byte sequence. - $buf=$chr; // Add first byte - for ($b=0;$b<8;$b++) { // for each byte in multibyte string... - $ord = $ord << 1; // Shift it left and ... - if ($ord & 128) { // ... and with 8th bit - if that is set, then there are still bytes in sequence. - $a++; // Increase pointer... - $buf.=substr($str,$a,1); // ... and add the next char. - } else break; + $buf = $chr; // Add first byte + for ($b = 0; $b < 8; $b++) { // for each byte in multibyte string... + $ord = $ord << 1; // Shift it left and ... + if ($ord & 128) { // ... and with 8th bit - if that is set, then there are still bytes in sequence. + $a++; // Increase pointer... + $buf .= substr($str, $a, 1); // ... and add the next char. + } else { + break; + } } - if (isset($this->parsedCharsets[$charset]['utf8'][$buf])) { // If the UTF-8 char-sequence is found then... - $mByte = $this->parsedCharsets[$charset]['utf8'][$buf]; // The local number - if ($mByte>255) { // If the local number is greater than 255 we will need to split the byte (16bit word assumed) in two chars. - $outStr.= chr(($mByte >> 8) & 255).chr($mByte & 255); - } else $outStr.= chr($mByte); - } elseif ($useEntityForNoChar) { // Create num entity: - $outStr.='&#'.$this->utf8CharToUnumber($buf,1).';'; - } else $outStr.=chr($this->noCharByteVal); // No char exists - } else $outStr.=chr($this->noCharByteVal); // No char exists (MIDDLE of MB sequence!) - } else $outStr.=$chr; // ... otherwise it's just ASCII 0-127 and one byte. Transparent + if (isset($this->parsedCharsets[$charset]['utf8'][$buf])) { // If the UTF-8 char-sequence is found then... + $mByte = $this->parsedCharsets[$charset]['utf8'][$buf]; // The local number + if ($mByte > 255) { // If the local number is greater than 255 we will need to split the byte (16bit word assumed) in two chars. + $outStr .= chr(($mByte >> 8) & 255) . chr($mByte & 255); + } else { + $outStr .= chr($mByte); + } + } elseif ($useEntityForNoChar) { // Create num entity: + $outStr .= '&#' . $this->utf8CharToUnumber($buf, 1) . ';'; + } else { + $outStr .= chr($this->noCharByteVal); + } // No char exists + } else { + $outStr .= chr($this->noCharByteVal); + } // No char exists (MIDDLE of MB sequence!) + } else { + $outStr .= $chr; + } // ... otherwise it's just ASCII 0-127 and one byte. Transparent } return $outStr; } @@ -766,27 +802,33 @@ class t3lib_cs { * @param string Input string * @return string Output string */ - function utf8_to_entities($str) { + function utf8_to_entities($str) { $strLen = strlen($str); - $outStr=''; - $buf=''; - for ($a=0;$a<$strLen;$a++) { // Traverse each char in UTF-8 string. - $chr=substr($str,$a,1); - $ord=ord($chr); - if ($ord>127) { // This means multibyte! (first byte!) - if ($ord & 64) { // Since the first byte must have the 7th bit set we check that. Otherwise we might be in the middle of a byte sequence. - $buf=$chr; // Add first byte - for ($b=0;$b<8;$b++) { // for each byte in multibyte string... - $ord = $ord << 1; // Shift it left and ... - if ($ord & 128) { // ... and with 8th bit - if that is set, then there are still bytes in sequence. - $a++; // Increase pointer... - $buf.=substr($str,$a,1); // ... and add the next char. - } else break; + $outStr = ''; + $buf = ''; + for ($a = 0; $a < $strLen; $a++) { // Traverse each char in UTF-8 string. + $chr = substr($str, $a, 1); + $ord = ord($chr); + if ($ord > 127) { // This means multibyte! (first byte!) + if ($ord & 64) { // Since the first byte must have the 7th bit set we check that. Otherwise we might be in the middle of a byte sequence. + $buf = $chr; // Add first byte + for ($b = 0; $b < 8; $b++) { // for each byte in multibyte string... + $ord = $ord << 1; // Shift it left and ... + if ($ord & 128) { // ... and with 8th bit - if that is set, then there are still bytes in sequence. + $a++; // Increase pointer... + $buf .= substr($str, $a, 1); // ... and add the next char. + } else { + break; + } } - $outStr.='&#'.$this->utf8CharToUnumber($buf,1).';'; - } else $outStr.=chr($this->noCharByteVal); // No char exists (MIDDLE of MB sequence!) - } else $outStr.=$chr; // ... otherwise it's just ASCII 0-127 and one byte. Transparent + $outStr .= '&#' . $this->utf8CharToUnumber($buf, 1) . ';'; + } else { + $outStr .= chr($this->noCharByteVal); + } // No char exists (MIDDLE of MB sequence!) + } else { + $outStr .= $chr; + } // ... otherwise it's just ASCII 0-127 and one byte. Transparent } return $outStr; @@ -799,30 +841,30 @@ class t3lib_cs { * @param boolean If set, then all string-HTML entities (like & or £ will be converted as well) * @return string Output string */ - function entities_to_utf8($str,$alsoStdHtmlEnt=0) { - if ($alsoStdHtmlEnt) { - $trans_tbl = array_flip(get_html_translation_table(HTML_ENTITIES)); // Getting them in iso-8859-1 - but thats ok since this is observed below. + function entities_to_utf8($str, $alsoStdHtmlEnt = 0) { + if ($alsoStdHtmlEnt) { + $trans_tbl = array_flip(get_html_translation_table(HTML_ENTITIES)); // Getting them in iso-8859-1 - but thats ok since this is observed below. } $token = md5(microtime()); $parts = explode($token, preg_replace('/(&([#[:alnum:]]*);)/', $token . '${2}' . $token, $str)); - foreach($parts as $k => $v) { - if ($k%2) { - if (substr($v,0,1)=='#') { // Dec or hex entities: - if (substr($v,1,1)=='x') { - $parts[$k] = $this->UnumberToChar(hexdec(substr($v,2))); + foreach ($parts as $k => $v) { + if ($k % 2) { + if (substr($v, 0, 1) == '#') { // Dec or hex entities: + if (substr($v, 1, 1) == 'x') { + $parts[$k] = $this->UnumberToChar(hexdec(substr($v, 2))); } else { - $parts[$k] = $this->UnumberToChar(substr($v,1)); + $parts[$k] = $this->UnumberToChar(substr($v, 1)); } - } elseif ($alsoStdHtmlEnt && $trans_tbl['&'.$v.';']) { // Other entities: - $parts[$k] = $this->utf8_encode($trans_tbl['&'.$v.';'],'iso-8859-1'); - } else { // No conversion: - $parts[$k] ='&'.$v.';'; + } elseif ($alsoStdHtmlEnt && $trans_tbl['&' . $v . ';']) { // Other entities: + $parts[$k] = $this->utf8_encode($trans_tbl['&' . $v . ';'], 'iso-8859-1'); + } else { // No conversion: + $parts[$k] = '&' . $v . ';'; } } } - return implode('',$parts); + return implode('', $parts); } /** @@ -833,32 +875,38 @@ class t3lib_cs { * @param boolean If set, then instead of integer numbers the real UTF-8 char is returned. * @return array Output array with the char numbers */ - function utf8_to_numberarray($str,$convEntities=0,$retChar=0) { + function utf8_to_numberarray($str, $convEntities = 0, $retChar = 0) { // If entities must be registered as well...: - if ($convEntities) { - $str = $this->entities_to_utf8($str,1); + if ($convEntities) { + $str = $this->entities_to_utf8($str, 1); } // Do conversion: $strLen = strlen($str); - $outArr=array(); - $buf=''; - for ($a=0;$a<$strLen;$a++) { // Traverse each char in UTF-8 string. - $chr=substr($str,$a,1); - $ord=ord($chr); - if ($ord>127) { // This means multibyte! (first byte!) - if ($ord & 64) { // Since the first byte must have the 7th bit set we check that. Otherwise we might be in the middle of a byte sequence. - $buf=$chr; // Add first byte - for ($b=0;$b<8;$b++) { // for each byte in multibyte string... - $ord = $ord << 1; // Shift it left and ... - if ($ord & 128) { // ... and with 8th bit - if that is set, then there are still bytes in sequence. - $a++; // Increase pointer... - $buf.=substr($str,$a,1); // ... and add the next char. - } else break; + $outArr = array(); + $buf = ''; + for ($a = 0; $a < $strLen; $a++) { // Traverse each char in UTF-8 string. + $chr = substr($str, $a, 1); + $ord = ord($chr); + if ($ord > 127) { // This means multibyte! (first byte!) + if ($ord & 64) { // Since the first byte must have the 7th bit set we check that. Otherwise we might be in the middle of a byte sequence. + $buf = $chr; // Add first byte + for ($b = 0; $b < 8; $b++) { // for each byte in multibyte string... + $ord = $ord << 1; // Shift it left and ... + if ($ord & 128) { // ... and with 8th bit - if that is set, then there are still bytes in sequence. + $a++; // Increase pointer... + $buf .= substr($str, $a, 1); // ... and add the next char. + } else { + break; + } } - $outArr[]=$retChar?$buf:$this->utf8CharToUnumber($buf); - } else $outArr[]=$retChar?chr($this->noCharByteVal):$this->noCharByteVal; // No char exists (MIDDLE of MB sequence!) - } else $outArr[]=$retChar?chr($ord):$ord; // ... otherwise it's just ASCII 0-127 and one byte. Transparent + $outArr[] = $retChar ? $buf : $this->utf8CharToUnumber($buf); + } else { + $outArr[] = $retChar ? chr($this->noCharByteVal) : $this->noCharByteVal; + } // No char exists (MIDDLE of MB sequence!) + } else { + $outArr[] = $retChar ? chr($ord) : $ord; + } // ... otherwise it's just ASCII 0-127 and one byte. Transparent } return $outArr; @@ -872,49 +920,59 @@ class t3lib_cs { * The binary representation of the character's integer value is thus simply spread across the bytes and the number of high bits set in the lead byte announces the number of bytes in the multibyte sequence: * * bytes | bits | representation - * 1 | 7 | 0vvvvvvv - * 2 | 11 | 110vvvvv 10vvvvvv - * 3 | 16 | 1110vvvv 10vvvvvv 10vvvvvv - * 4 | 21 | 11110vvv 10vvvvvv 10vvvvvv 10vvvvvv - * 5 | 26 | 111110vv 10vvvvvv 10vvvvvv 10vvvvvv 10vvvvvv - * 6 | 31 | 1111110v 10vvvvvv 10vvvvvv 10vvvvvv 10vvvvvv 10vvvvvv + * 1 | 7 | 0vvvvvvv + * 2 | 11 | 110vvvvv 10vvvvvv + * 3 | 16 | 1110vvvv 10vvvvvv 10vvvvvv + * 4 | 21 | 11110vvv 10vvvvvv 10vvvvvv 10vvvvvv + * 5 | 26 | 111110vv 10vvvvvv 10vvvvvv 10vvvvvv 10vvvvvv + * 6 | 31 | 1111110v 10vvvvvv 10vvvvvv 10vvvvvv 10vvvvvv 10vvvvvv * * @param integer UNICODE integer * @return string UTF-8 multibyte character string * @see utf8CharToUnumber() */ - function UnumberToChar($cbyte) { - $str=''; + function UnumberToChar($cbyte) { + $str = ''; if ($cbyte < 0x80) { - $str.=chr($cbyte); - } else if ($cbyte < 0x800) { - $str.=chr(0xC0 | ($cbyte >> 6)); - $str.=chr(0x80 | ($cbyte & 0x3F)); - } else if ($cbyte < 0x10000) { - $str.=chr(0xE0 | ($cbyte >> 12)); - $str.=chr(0x80 | (($cbyte >> 6) & 0x3F)); - $str.=chr(0x80 | ($cbyte & 0x3F)); - } else if ($cbyte < 0x200000) { - $str.=chr(0xF0 | ($cbyte >> 18)); - $str.=chr(0x80 | (($cbyte >> 12) & 0x3F)); - $str.=chr(0x80 | (($cbyte >> 6) & 0x3F)); - $str.=chr(0x80 | ($cbyte & 0x3F)); - } else if ($cbyte < 0x4000000) { - $str.=chr(0xF8 | ($cbyte >> 24)); - $str.=chr(0x80 | (($cbyte >> 18) & 0x3F)); - $str.=chr(0x80 | (($cbyte >> 12) & 0x3F)); - $str.=chr(0x80 | (($cbyte >> 6) & 0x3F)); - $str.=chr(0x80 | ($cbyte & 0x3F)); - } else if ($cbyte < 0x80000000) { - $str.=chr(0xFC | ($cbyte >> 30)); - $str.=chr(0x80 | (($cbyte >> 24) & 0x3F)); - $str.=chr(0x80 | (($cbyte >> 18) & 0x3F)); - $str.=chr(0x80 | (($cbyte >> 12) & 0x3F)); - $str.=chr(0x80 | (($cbyte >> 6) & 0x3F)); - $str.=chr(0x80 | ($cbyte & 0x3F)); - } else { // Cannot express a 32-bit character in UTF-8 - $str .= chr($this->noCharByteVal); + $str .= chr($cbyte); + } else { + if ($cbyte < 0x800) { + $str .= chr(0xC0 | ($cbyte >> 6)); + $str .= chr(0x80 | ($cbyte & 0x3F)); + } else { + if ($cbyte < 0x10000) { + $str .= chr(0xE0 | ($cbyte >> 12)); + $str .= chr(0x80 | (($cbyte >> 6) & 0x3F)); + $str .= chr(0x80 | ($cbyte & 0x3F)); + } else { + if ($cbyte < 0x200000) { + $str .= chr(0xF0 | ($cbyte >> 18)); + $str .= chr(0x80 | (($cbyte >> 12) & 0x3F)); + $str .= chr(0x80 | (($cbyte >> 6) & 0x3F)); + $str .= chr(0x80 | ($cbyte & 0x3F)); + } else { + if ($cbyte < 0x4000000) { + $str .= chr(0xF8 | ($cbyte >> 24)); + $str .= chr(0x80 | (($cbyte >> 18) & 0x3F)); + $str .= chr(0x80 | (($cbyte >> 12) & 0x3F)); + $str .= chr(0x80 | (($cbyte >> 6) & 0x3F)); + $str .= chr(0x80 | ($cbyte & 0x3F)); + } else { + if ($cbyte < 0x80000000) { + $str .= chr(0xFC | ($cbyte >> 30)); + $str .= chr(0x80 | (($cbyte >> 24) & 0x3F)); + $str .= chr(0x80 | (($cbyte >> 18) & 0x3F)); + $str .= chr(0x80 | (($cbyte >> 12) & 0x3F)); + $str .= chr(0x80 | (($cbyte >> 6) & 0x3F)); + $str .= chr(0x80 | ($cbyte & 0x3F)); + } else { // Cannot express a 32-bit character in UTF-8 + $str .= chr($this->noCharByteVal); + } + } + } + } + } } return $str; } @@ -928,33 +986,30 @@ class t3lib_cs { * @return integer UNICODE integer * @see UnumberToChar() */ - function utf8CharToUnumber($str,$hex=0) { - $ord=ord(substr($str,0,1)); // First char + function utf8CharToUnumber($str, $hex = 0) { + $ord = ord(substr($str, 0, 1)); // First char - if (($ord & 192) == 192) { // This verifyes that it IS a multi byte string - $binBuf=''; - for ($b=0;$b<8;$b++) { // for each byte in multibyte string... - $ord = $ord << 1; // Shift it left and ... - if ($ord & 128) { // ... and with 8th bit - if that is set, then there are still bytes in sequence. - $binBuf.=substr('00000000'.decbin(ord(substr($str,$b+1,1))),-6); - } else break; + if (($ord & 192) == 192) { // This verifyes that it IS a multi byte string + $binBuf = ''; + for ($b = 0; $b < 8; $b++) { // for each byte in multibyte string... + $ord = $ord << 1; // Shift it left and ... + if ($ord & 128) { // ... and with 8th bit - if that is set, then there are still bytes in sequence. + $binBuf .= substr('00000000' . decbin(ord(substr($str, $b + 1, 1))), -6); + } else { + break; + } } - $binBuf=substr('00000000'.decbin(ord(substr($str,0,1))),-(6-$b)).$binBuf; + $binBuf = substr('00000000' . decbin(ord(substr($str, 0, 1))), -(6 - $b)) . $binBuf; $int = bindec($binBuf); - } else $int = $ord; + } else { + $int = $ord; + } - return $hex ? 'x'.dechex($int) : $int; + return $hex ? 'x' . dechex($int) : $int; } - - - - - - - /******************************************** * * Init functions @@ -971,57 +1026,63 @@ class t3lib_cs { * @return integer Returns '1' if already loaded. Returns FALSE if charset conversion table was not found. Returns '2' if the charset conversion table was found and parsed. * @access private */ - function initCharset($charset) { + function initCharset($charset) { // Only process if the charset is not yet loaded: - if (empty($this->parsedCharsets[$charset]) || !is_array($this->parsedCharsets[$charset])) { + if (empty($this->parsedCharsets[$charset]) || !is_array($this->parsedCharsets[$charset])) { // Conversion table filename: - $charsetConvTableFile = PATH_t3lib.'csconvtbl/'.$charset.'.tbl'; + $charsetConvTableFile = PATH_t3lib . 'csconvtbl/' . $charset . '.tbl'; // If the conversion table is found: - if ($charset && t3lib_div::validPathStr($charsetConvTableFile) && @is_file($charsetConvTableFile)) { + if ($charset && t3lib_div::validPathStr($charsetConvTableFile) && @is_file($charsetConvTableFile)) { // Cache file for charsets: // Caching brought parsing time for gb2312 down from 2400 ms to 150 ms. For other charsets we are talking 11 ms down to zero. - $cacheFile = t3lib_div::getFileAbsFileName('typo3temp/cs/charset_'.$charset.'.tbl'); - if ($cacheFile && @is_file($cacheFile)) { - $this->parsedCharsets[$charset]=unserialize(t3lib_div::getUrl($cacheFile)); + $cacheFile = t3lib_div::getFileAbsFileName('typo3temp/cs/charset_' . $charset . '.tbl'); + if ($cacheFile && @is_file($cacheFile)) { + $this->parsedCharsets[$charset] = unserialize(t3lib_div::getUrl($cacheFile)); } else { // Parse conversion table into lines: - $lines=t3lib_div::trimExplode(chr(10),t3lib_div::getUrl($charsetConvTableFile),1); + $lines = t3lib_div::trimExplode(LF, t3lib_div::getUrl($charsetConvTableFile), 1); // Initialize the internal variable holding the conv. table: - $this->parsedCharsets[$charset]=array('local'=>array(),'utf8'=>array()); + $this->parsedCharsets[$charset] = array('local' => array(), 'utf8' => array()); // traverse the lines: - $detectedType=''; - foreach($lines as $value) { - if (trim($value) && substr($value,0,1)!='#') { // Comment line or blanks are ignored. + $detectedType = ''; + foreach ($lines as $value) { + if (trim($value) && substr($value, 0, 1) != '#') { // Comment line or blanks are ignored. // Detect type if not done yet: (Done on first real line) // The "whitespaced" type is on the syntax "0x0A 0x000A #LINE FEED" while "ms-token" is like "B9 = U+00B9 : SUPERSCRIPT ONE" - if (!$detectedType) $detectedType = preg_match('/[[:space:]]*0x([[:alnum:]]*)[[:space:]]+0x([[:alnum:]]*)[[:space:]]+/',$value) ? 'whitespaced' : 'ms-token'; + if (!$detectedType) { + $detectedType = preg_match('/[[:space:]]*0x([[:alnum:]]*)[[:space:]]+0x([[:alnum:]]*)[[:space:]]+/', $value) ? 'whitespaced' : 'ms-token'; + } - if ($detectedType=='ms-token') { + if ($detectedType == 'ms-token') { list($hexbyte, $utf8) = preg_split('/[=:]/', $value, 3); - } elseif ($detectedType=='whitespaced') { - $regA=array(); - preg_match('/[[:space:]]*0x([[:alnum:]]*)[[:space:]]+0x([[:alnum:]]*)[[:space:]]+/',$value,$regA); + } elseif ($detectedType == 'whitespaced') { + $regA = array(); + preg_match('/[[:space:]]*0x([[:alnum:]]*)[[:space:]]+0x([[:alnum:]]*)[[:space:]]+/', $value, $regA); $hexbyte = $regA[1]; - $utf8 = 'U+'.$regA[2]; + $utf8 = 'U+' . $regA[2]; } $decval = hexdec(trim($hexbyte)); - if ($decval>127) { - $utf8decval = hexdec(substr(trim($utf8),2)); - $this->parsedCharsets[$charset]['local'][$decval]=$this->UnumberToChar($utf8decval); - $this->parsedCharsets[$charset]['utf8'][$this->parsedCharsets[$charset]['local'][$decval]]=$decval; + if ($decval > 127) { + $utf8decval = hexdec(substr(trim($utf8), 2)); + $this->parsedCharsets[$charset]['local'][$decval] = $this->UnumberToChar($utf8decval); + $this->parsedCharsets[$charset]['utf8'][$this->parsedCharsets[$charset]['local'][$decval]] = $decval; } } } - if ($cacheFile) { - t3lib_div::writeFileToTypo3tempDir($cacheFile,serialize($this->parsedCharsets[$charset])); + if ($cacheFile) { + t3lib_div::writeFileToTypo3tempDir($cacheFile, serialize($this->parsedCharsets[$charset])); } } return 2; - } else return false; - } else return 1; + } else { + return FALSE; + } + } else { + return 1; + } } /** @@ -1033,28 +1094,32 @@ class t3lib_cs { * @return integer Returns FALSE on error, a TRUE value on success: 1 table already loaded, 2, cached version, 3 table parsed (and cached). * @access private */ - function initUnicodeData($mode=null) { + function initUnicodeData($mode = NULL) { // cache files $cacheFileCase = t3lib_div::getFileAbsFileName('typo3temp/cs/cscase_utf-8.tbl'); $cacheFileASCII = t3lib_div::getFileAbsFileName('typo3temp/cs/csascii_utf-8.tbl'); // Only process if the tables are not yet loaded - switch($mode) { + switch ($mode) { case 'case': - if (is_array($this->caseFolding['utf-8'])) return 1; + if (is_array($this->caseFolding['utf-8'])) { + return 1; + } // Use cached version if possible - if ($cacheFileCase && @is_file($cacheFileCase)) { + if ($cacheFileCase && @is_file($cacheFileCase)) { $this->caseFolding['utf-8'] = unserialize(t3lib_div::getUrl($cacheFileCase)); return 2; } break; case 'ascii': - if (is_array($this->toASCII['utf-8'])) return 1; + if (is_array($this->toASCII['utf-8'])) { + return 1; + } // Use cached version if possible - if ($cacheFileASCII && @is_file($cacheFileASCII)) { + if ($cacheFileASCII && @is_file($cacheFileASCII)) { $this->toASCII['utf-8'] = unserialize(t3lib_div::getUrl($cacheFileASCII)); return 2; } @@ -1062,11 +1127,15 @@ class t3lib_cs { } // process main Unicode data file - $unicodeDataFile = PATH_t3lib.'unidata/UnicodeData.txt'; - if (!(t3lib_div::validPathStr($unicodeDataFile) && @is_file($unicodeDataFile))) return false; + $unicodeDataFile = PATH_t3lib . 'unidata/UnicodeData.txt'; + if (!(t3lib_div::validPathStr($unicodeDataFile) && @is_file($unicodeDataFile))) { + return FALSE; + } - $fh = fopen($unicodeDataFile,'rb'); - if (!$fh) return false; + $fh = fopen($unicodeDataFile, 'rb'); + if (!$fh) { + return FALSE; + } // key = utf8 char (single codepoint), value = utf8 string (codepoint sequence) // note: we use the UTF-8 characters here and not the Unicode numbers to avoid conversion roundtrip in utf8_strtolower/-upper) @@ -1076,58 +1145,72 @@ class t3lib_cs { $utf8CaseFolding['toLower'] = array(); $utf8CaseFolding['toTitle'] = array(); - $decomposition = array(); // array of temp. decompositions - $mark = array(); // array of chars that are marks (eg. composing accents) - $number = array(); // array of chars that are numbers (eg. digits) - $omit = array(); // array of chars to be omitted (eg. Russian hard sign) + $decomposition = array(); // array of temp. decompositions + $mark = array(); // array of chars that are marks (eg. composing accents) + $number = array(); // array of chars that are numbers (eg. digits) + $omit = array(); // array of chars to be omitted (eg. Russian hard sign) - while (!feof($fh)) { - $line = fgets($fh,4096); + while (!feof($fh)) { + $line = fgets($fh, 4096); // has a lot of info - list($char,$name,$cat,,,$decomp,,,$num,,,,$upper,$lower,$title,) = explode(';', rtrim($line)); + list($char, $name, $cat, , , $decomp, , , $num, , , , $upper, $lower, $title,) = explode(';', rtrim($line)); $ord = hexdec($char); - if ($ord > 0xFFFF) break; // only process the BMP + if ($ord > 0xFFFF) { + break; + } // only process the BMP $utf8_char = $this->UnumberToChar($ord); - if ($upper) $utf8CaseFolding['toUpper'][$utf8_char] = $this->UnumberToChar(hexdec($upper)); - if ($lower) $utf8CaseFolding['toLower'][$utf8_char] = $this->UnumberToChar(hexdec($lower)); + if ($upper) { + $utf8CaseFolding['toUpper'][$utf8_char] = $this->UnumberToChar(hexdec($upper)); + } + if ($lower) { + $utf8CaseFolding['toLower'][$utf8_char] = $this->UnumberToChar(hexdec($lower)); + } // store "title" only when different from "upper" (only a few) - if ($title && $title != $upper) $utf8CaseFolding['toTitle'][$utf8_char] = $this->UnumberToChar(hexdec($title)); + if ($title && $title != $upper) { + $utf8CaseFolding['toTitle'][$utf8_char] = $this->UnumberToChar(hexdec($title)); + } - switch ($cat{0}) { - case 'M': // mark (accent, umlaut, ...) + switch ($cat{0}) { + case 'M': // mark (accent, umlaut, ...) $mark["U+$char"] = 1; break; - case 'N': // numeric value - if ($ord > 0x80 && $num != '') $number["U+$char"] = $num; + case 'N': // numeric value + if ($ord > 0x80 && $num != '') { + $number["U+$char"] = $num; + } } // accented Latin letters without "official" decomposition $match = array(); - if (preg_match('/^LATIN (SMALL|CAPITAL) LETTER ([A-Z]) WITH/',$name,$match) && !$decomp) { + if (preg_match('/^LATIN (SMALL|CAPITAL) LETTER ([A-Z]) WITH/', $name, $match) && !$decomp) { $c = ord($match[2]); - if ($match[1] == 'SMALL') $c += 32; + if ($match[1] == 'SMALL') { + $c += 32; + } $decomposition["U+$char"] = array(dechex($c)); continue; } $match = array(); - if (preg_match('/(<.*>)? *(.+)/',$decomp,$match)) { - switch($match[1]) { - case '': // add parenthesis as circle replacement, eg (1) - $match[2] = '0028 '.$match[2].' 0029'; + if (preg_match('/(<.*>)? *(.+)/', $decomp, $match)) { + switch ($match[1]) { + case '': // add parenthesis as circle replacement, eg (1) + $match[2] = '0028 ' . $match[2] . ' 0029'; break; - case '': // add square brackets as square replacement, eg [1] - $match[2] = '005B '.$match[2].' 005D'; + case '': // add square brackets as square replacement, eg [1] + $match[2] = '005B ' . $match[2] . ' 005D'; break; - case '': // ignore multi char decompositions that start with a space - if (preg_match('/^0020 /',$match[2])) continue 2; + case '': // ignore multi char decompositions that start with a space + if (preg_match('/^0020 /', $match[2])) { + continue 2; + } break; // ignore Arabic and vertical layout presentation decomposition @@ -1144,31 +1227,31 @@ class t3lib_cs { fclose($fh); // process additional Unicode data for casing (allow folded characters to expand into a sequence) - $specialCasingFile = PATH_t3lib.'unidata/SpecialCasing.txt'; - if (t3lib_div::validPathStr($specialCasingFile) && @is_file($specialCasingFile)) { - $fh = fopen($specialCasingFile,'rb'); - if ($fh) { - while (!feof($fh)) { - $line = fgets($fh,4096); - if ($line{0} != '#' && trim($line) != '') { + $specialCasingFile = PATH_t3lib . 'unidata/SpecialCasing.txt'; + if (t3lib_div::validPathStr($specialCasingFile) && @is_file($specialCasingFile)) { + $fh = fopen($specialCasingFile, 'rb'); + if ($fh) { + while (!feof($fh)) { + $line = fgets($fh, 4096); + if ($line{0} != '#' && trim($line) != '') { - list($char,$lower,$title,$upper,$cond) = t3lib_div::trimExplode(';', $line); - if ($cond == '' || $cond{0} == '#') { + list($char, $lower, $title, $upper, $cond) = t3lib_div::trimExplode(';', $line); + if ($cond == '' || $cond{0} == '#') { $utf8_char = $this->UnumberToChar(hexdec($char)); - if ($char != $lower) { + if ($char != $lower) { $arr = explode(' ', $lower); - for ($i=0; isset($arr[$i]); $i++) $arr[$i] = $this->UnumberToChar(hexdec($arr[$i])); - $utf8CaseFolding['toLower'][$utf8_char] = implode('',$arr); + for ($i = 0; isset($arr[$i]); $i++) $arr[$i] = $this->UnumberToChar(hexdec($arr[$i])); + $utf8CaseFolding['toLower'][$utf8_char] = implode('', $arr); } - if ($char != $title && $title != $upper) { + if ($char != $title && $title != $upper) { $arr = explode(' ', $title); - for ($i=0; isset($arr[$i]); $i++) $arr[$i] = $this->UnumberToChar(hexdec($arr[$i])); - $utf8CaseFolding['toTitle'][$utf8_char] = implode('',$arr); + for ($i = 0; isset($arr[$i]); $i++) $arr[$i] = $this->UnumberToChar(hexdec($arr[$i])); + $utf8CaseFolding['toTitle'][$utf8_char] = implode('', $arr); } - if ($char != $upper) { - $arr = explode(' ', $upper); - for ($i=0; isset($arr[$i]); $i++) $arr[$i] = $this->UnumberToChar(hexdec($arr[$i])); - $utf8CaseFolding['toUpper'][$utf8_char] = implode('',$arr); + if ($char != $upper) { + $arr = explode(' ', $upper); + for ($i = 0; isset($arr[$i]); $i++) $arr[$i] = $this->UnumberToChar(hexdec($arr[$i])); + $utf8CaseFolding['toUpper'][$utf8_char] = implode('', $arr); } } } @@ -1178,15 +1261,17 @@ class t3lib_cs { } // process custom decompositions - $customTranslitFile = PATH_t3lib.'unidata/Translit.txt'; - if (t3lib_div::validPathStr($customTranslitFile) && @is_file($customTranslitFile)) { - $fh = fopen($customTranslitFile,'rb'); - if ($fh) { - while (!feof($fh)) { - $line = fgets($fh,4096); - if ($line{0} != '#' && trim($line) != '') { - list($char,$translit) = t3lib_div::trimExplode(';', $line); - if (!$translit) $omit["U+$char"] = 1; + $customTranslitFile = PATH_t3lib . 'unidata/Translit.txt'; + if (t3lib_div::validPathStr($customTranslitFile) && @is_file($customTranslitFile)) { + $fh = fopen($customTranslitFile, 'rb'); + if ($fh) { + while (!feof($fh)) { + $line = fgets($fh, 4096); + if ($line{0} != '#' && trim($line) != '') { + list($char, $translit) = t3lib_div::trimExplode(';', $line); + if (!$translit) { + $omit["U+$char"] = 1; + } $decomposition["U+$char"] = explode(' ', $translit); } @@ -1196,19 +1281,19 @@ class t3lib_cs { } // decompose and remove marks; inspired by unac (Loic Dachary ) - foreach($decomposition as $from => $to) { + foreach ($decomposition as $from => $to) { $code_decomp = array(); - while ($code_value = array_shift($to)) { - if (isset($decomposition["U+$code_value"])) { // do recursive decomposition - foreach(array_reverse($decomposition["U+$code_value"]) as $cv) { + while ($code_value = array_shift($to)) { + if (isset($decomposition["U+$code_value"])) { // do recursive decomposition + foreach (array_reverse($decomposition["U+$code_value"]) as $cv) { array_unshift($to, $cv); } - } elseif (!isset($mark["U+$code_value"])) { // remove mark + } elseif (!isset($mark["U+$code_value"])) { // remove mark array_push($code_decomp, $code_value); } } - if (count($code_decomp) || isset($omit[$from])) { + if (count($code_decomp) || isset($omit[$from])) { $decomposition[$from] = $code_decomp; } else { unset($decomposition[$from]); @@ -1219,32 +1304,35 @@ class t3lib_cs { $this->toASCII['utf-8'] = array(); $ascii =& $this->toASCII['utf-8']; - foreach($decomposition as $from => $to) { + foreach ($decomposition as $from => $to) { $code_decomp = array(); - while ($code_value = array_shift($to)) { + while ($code_value = array_shift($to)) { $ord = hexdec($code_value); - if ($ord > 127) - continue 2; // skip decompositions containing non-ASCII chars + if ($ord > 127) { + continue 2; + } // skip decompositions containing non-ASCII chars else - array_push($code_decomp,chr($ord)); + { + array_push($code_decomp, chr($ord)); + } } - $ascii[$this->UnumberToChar(hexdec($from))] = join('',$code_decomp); + $ascii[$this->UnumberToChar(hexdec($from))] = join('', $code_decomp); } // add numeric decompositions - foreach($number as $from => $to) { + foreach ($number as $from => $to) { $utf8_char = $this->UnumberToChar(hexdec($from)); - if (!isset($ascii[$utf8_char])) { + if (!isset($ascii[$utf8_char])) { $ascii[$utf8_char] = $to; } } - if ($cacheFileCase) { - t3lib_div::writeFileToTypo3tempDir($cacheFileCase,serialize($utf8CaseFolding)); + if ($cacheFileCase) { + t3lib_div::writeFileToTypo3tempDir($cacheFileCase, serialize($utf8CaseFolding)); } - if ($cacheFileASCII) { - t3lib_div::writeFileToTypo3tempDir($cacheFileASCII,serialize($ascii)); + if ($cacheFileASCII) { + t3lib_div::writeFileToTypo3tempDir($cacheFileASCII, serialize($ascii)); } return 3; @@ -1258,55 +1346,63 @@ class t3lib_cs { * @return integer Returns FALSE on error, a TRUE value on success: 1 table already loaded, 2, cached version, 3 table parsed (and cached). * @access private */ - function initCaseFolding($charset) { + function initCaseFolding($charset) { // Only process if the case table is not yet loaded: - if (is_array($this->caseFolding[$charset])) return 1; + if (is_array($this->caseFolding[$charset])) { + return 1; + } // Use cached version if possible - $cacheFile = t3lib_div::getFileAbsFileName('typo3temp/cs/cscase_'.$charset.'.tbl'); - if ($cacheFile && @is_file($cacheFile)) { + $cacheFile = t3lib_div::getFileAbsFileName('typo3temp/cs/cscase_' . $charset . '.tbl'); + if ($cacheFile && @is_file($cacheFile)) { $this->caseFolding[$charset] = unserialize(t3lib_div::getUrl($cacheFile)); return 2; } // init UTF-8 conversion for this charset - if (!$this->initCharset($charset)) { - return false; + if (!$this->initCharset($charset)) { + return FALSE; } // UTF-8 case folding is used as the base conversion table - if (!$this->initUnicodeData('case')) { - return false; + if (!$this->initUnicodeData('case')) { + return FALSE; } $nochar = chr($this->noCharByteVal); - foreach ($this->parsedCharsets[$charset]['local'] as $ci => $utf8) { + foreach ($this->parsedCharsets[$charset]['local'] as $ci => $utf8) { // reconvert to charset (don't use chr() of numeric value, might be muli-byte) $c = $this->utf8_decode($utf8, $charset); // $cc = $this->conv($this->caseFolding['utf-8']['toUpper'][$utf8], 'utf-8', $charset); $cc = $this->utf8_decode($this->caseFolding['utf-8']['toUpper'][$utf8], $charset); - if ($cc != '' && $cc != $nochar) $this->caseFolding[$charset]['toUpper'][$c] = $cc; + if ($cc != '' && $cc != $nochar) { + $this->caseFolding[$charset]['toUpper'][$c] = $cc; + } // $cc = $this->conv($this->caseFolding['utf-8']['toLower'][$utf8], 'utf-8', $charset); $cc = $this->utf8_decode($this->caseFolding['utf-8']['toLower'][$utf8], $charset); - if ($cc != '' && $cc != $nochar) $this->caseFolding[$charset]['toLower'][$c] = $cc; + if ($cc != '' && $cc != $nochar) { + $this->caseFolding[$charset]['toLower'][$c] = $cc; + } // $cc = $this->conv($this->caseFolding['utf-8']['toTitle'][$utf8], 'utf-8', $charset); $cc = $this->utf8_decode($this->caseFolding['utf-8']['toTitle'][$utf8], $charset); - if ($cc != '' && $cc != $nochar) $this->caseFolding[$charset]['toTitle'][$c] = $cc; + if ($cc != '' && $cc != $nochar) { + $this->caseFolding[$charset]['toTitle'][$c] = $cc; + } } // add the ASCII case table - for ($i=ord('a'); $i<=ord('z'); $i++) { - $this->caseFolding[$charset]['toUpper'][chr($i)] = chr($i-32); + for ($i = ord('a'); $i <= ord('z'); $i++) { + $this->caseFolding[$charset]['toUpper'][chr($i)] = chr($i - 32); } - for ($i=ord('A'); $i<=ord('Z'); $i++) { - $this->caseFolding[$charset]['toLower'][chr($i)] = chr($i+32); + for ($i = ord('A'); $i <= ord('Z'); $i++) { + $this->caseFolding[$charset]['toLower'][chr($i)] = chr($i + 32); } - if ($cacheFile) { - t3lib_div::writeFileToTypo3tempDir($cacheFile,serialize($this->caseFolding[$charset])); + if ($cacheFile) { + t3lib_div::writeFileToTypo3tempDir($cacheFile, serialize($this->caseFolding[$charset])); } return 3; @@ -1320,59 +1416,47 @@ class t3lib_cs { * @return integer Returns FALSE on error, a TRUE value on success: 1 table already loaded, 2, cached version, 3 table parsed (and cached). * @access private */ - function initToASCII($charset) { + function initToASCII($charset) { // Only process if the case table is not yet loaded: - if (is_array($this->toASCII[$charset])) return 1; + if (is_array($this->toASCII[$charset])) { + return 1; + } // Use cached version if possible - $cacheFile = t3lib_div::getFileAbsFileName('typo3temp/cs/csascii_'.$charset.'.tbl'); - if ($cacheFile && @is_file($cacheFile)) { + $cacheFile = t3lib_div::getFileAbsFileName('typo3temp/cs/csascii_' . $charset . '.tbl'); + if ($cacheFile && @is_file($cacheFile)) { $this->toASCII[$charset] = unserialize(t3lib_div::getUrl($cacheFile)); return 2; } // init UTF-8 conversion for this charset - if (!$this->initCharset($charset)) { - return false; + if (!$this->initCharset($charset)) { + return FALSE; } // UTF-8/ASCII transliteration is used as the base conversion table - if (!$this->initUnicodeData('ascii')) { - return false; + if (!$this->initUnicodeData('ascii')) { + return FALSE; } $nochar = chr($this->noCharByteVal); - foreach ($this->parsedCharsets[$charset]['local'] as $ci => $utf8) { + foreach ($this->parsedCharsets[$charset]['local'] as $ci => $utf8) { // reconvert to charset (don't use chr() of numeric value, might be muli-byte) $c = $this->utf8_decode($utf8, $charset); - if (isset($this->toASCII['utf-8'][$utf8])) { + if (isset($this->toASCII['utf-8'][$utf8])) { $this->toASCII[$charset][$c] = $this->toASCII['utf-8'][$utf8]; } } - if ($cacheFile) { - t3lib_div::writeFileToTypo3tempDir($cacheFile,serialize($this->toASCII[$charset])); + if ($cacheFile) { + t3lib_div::writeFileToTypo3tempDir($cacheFile, serialize($this->toASCII[$charset])); } return 3; } - - - - - - - - - - - - - - /******************************************** * * String operation functions @@ -1391,47 +1475,49 @@ class t3lib_cs { * @see substr(), mb_substr() * @author Martin Kutschker */ - function substr($charset,$string,$start,$len=null) { - if ($len===0) return ''; - - if ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'mbstring') { - // cannot omit $len, when specifying charset - if ($len==null) { - $enc = mb_internal_encoding(); // save internal encoding - mb_internal_encoding($charset); - $str = mb_substr($string,$start); - mb_internal_encoding($enc); // restore internal encoding - - return $str; - } - else { - return mb_substr($string,$start,$len,$charset); - } - } elseif ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'iconv') { - // cannot omit $len, when specifying charset - if ($len==null) { - $enc = iconv_get_encoding('internal_encoding'); // save internal encoding - iconv_set_encoding('internal_encoding',$charset); - $str = iconv_substr($string,$start); - iconv_set_encoding('internal_encoding',$enc); // restore internal encoding - - return $str; - } - else { - return iconv_substr($string,$start,$len,$charset); - } - } elseif ($charset == 'utf-8') { - return $this->utf8_substr($string,$start,$len); - } elseif ($this->eucBasedSets[$charset]) { - return $this->euc_substr($string,$start,$charset,$len); - } elseif ($this->twoByteSets[$charset]) { - return substr($string,$start*2,$len*2); - } elseif ($this->fourByteSets[$charset]) { - return substr($string,$start*4,$len*4); + function substr($charset, $string, $start, $len = NULL) { + if ($len === 0 || $string === '') { + return ''; } - // treat everything else as single-byte encoding - return $len === NULL ? substr($string,$start) : substr($string,$start,$len); + if ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'mbstring') { + // cannot omit $len, when specifying charset + if ($len == NULL) { + $enc = mb_internal_encoding(); // save internal encoding + mb_internal_encoding($charset); + $str = mb_substr($string, $start); + mb_internal_encoding($enc); // restore internal encoding + + return $str; + } + else { + return mb_substr($string, $start, $len, $charset); + } + } elseif ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'iconv') { + // cannot omit $len, when specifying charset + if ($len == NULL) { + $enc = iconv_get_encoding('internal_encoding'); // save internal encoding + iconv_set_encoding('internal_encoding', $charset); + $str = iconv_substr($string, $start); + iconv_set_encoding('internal_encoding', $enc); // restore internal encoding + + return $str; + } + else { + return iconv_substr($string, $start, $len, $charset); + } + } elseif ($charset == 'utf-8') { + return $this->utf8_substr($string, $start, $len); + } elseif ($this->eucBasedSets[$charset]) { + return $this->euc_substr($string, $start, $charset, $len); + } elseif ($this->twoByteSets[$charset]) { + return substr($string, $start * 2, $len * 2); + } elseif ($this->fourByteSets[$charset]) { + return substr($string, $start * 4, $len * 4); + } + + // treat everything else as single-byte encoding + return $len === NULL ? substr($string, $start) : substr($string, $start, $len); } /** @@ -1444,21 +1530,21 @@ class t3lib_cs { * @see strlen() * @author Martin Kutschker */ - function strlen($charset,$string) { - if ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'mbstring') { - return mb_strlen($string,$charset); - } elseif ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'iconv') { - return iconv_strlen($string,$charset); - } elseif ($charset == 'utf-8') { + function strlen($charset, $string) { + if ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'mbstring') { + return mb_strlen($string, $charset); + } elseif ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'iconv') { + return iconv_strlen($string, $charset); + } elseif ($charset == 'utf-8') { return $this->utf8_strlen($string); - } elseif ($this->eucBasedSets[$charset]) { - return $this->euc_strlen($string,$charset); - } elseif ($this->twoByteSets[$charset]) { - return strlen($string)/2; - } elseif ($this->fourByteSets[$charset]) { - return strlen($string)/4; + } elseif ($this->eucBasedSets[$charset]) { + return $this->euc_strlen($string, $charset); + } elseif ($this->twoByteSets[$charset]) { + return strlen($string) / 2; + } elseif ($this->fourByteSets[$charset]) { + return strlen($string) / 4; } - // treat everything else as single-byte encoding + // treat everything else as single-byte encoding return strlen($string); } @@ -1473,7 +1559,7 @@ class t3lib_cs { * @see mb_strlen(), mb_substr() */ protected function cropMbstring($charset, $string, $len, $crop = '') { - if (intval($len) == 0 || mb_strlen($string) < $len) { + if (intval($len) === 0 || mb_strlen($string, $charset) <= abs($len)) { return $string; } @@ -1498,49 +1584,53 @@ class t3lib_cs { * @see substr(), mb_strimwidth() * @author Martin Kutschker */ - function crop($charset,$string,$len,$crop='') { + function crop($charset, $string, $len, $crop = '') { if ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'mbstring') { return $this->cropMbstring($charset, $string, $len, $crop); } - if (intval($len) == 0) return $string; + if (intval($len) == 0) { + return $string; + } - if ($charset == 'utf-8') { - $i = $this->utf8_char2byte_pos($string,$len); - } elseif ($this->eucBasedSets[$charset]) { - $i = $this->euc_char2byte_pos($string,$len,$charset); + if ($charset == 'utf-8') { + $i = $this->utf8_char2byte_pos($string, $len); + } elseif ($this->eucBasedSets[$charset]) { + $i = $this->euc_char2byte_pos($string, $len, $charset); } else { - if ($len > 0) { + if ($len > 0) { $i = $len; } else { - $i = strlen($string)+$len; - if ($i<=0) $i = false; + $i = strlen($string) + $len; + if ($i <= 0) { + $i = FALSE; + } } } - if ($i === false) { // $len outside actual string length + if ($i === FALSE) { // $len outside actual string length return $string; - } else { - if ($len > 0) { - if (strlen($string{$i})) { - return substr($string,0,$i).$crop; + } else { + if ($len > 0) { + if (strlen($string{$i})) { + return substr($string, 0, $i) . $crop; } } else { - if (strlen($string{$i-1})) { - return $crop.substr($string,$i); + if (strlen($string{$i - 1})) { + return $crop . substr($string, $i); } } -/* - if (abs($len)<$this->strlen($charset,$string)) { // Has to use ->strlen() - otherwise multibyte strings ending with a multibyte char will return true here (which is not a catastrophe, but...) - if ($len > 0) { - return substr($string,0,$i).$crop; - } else { - return $crop.substr($string,$i); - } - } -*/ + /* + if (abs($len)<$this->strlen($charset,$string)) { // Has to use ->strlen() - otherwise multibyte strings ending with a multibyte char will return true here (which is not a catastrophe, but...) + if ($len > 0) { + return substr($string,0,$i).$crop; + } else { + return $crop.substr($string,$i); + } + } + */ } return $string; } @@ -1555,30 +1645,34 @@ class t3lib_cs { * @see mb_strcut() * @author Martin Kutschker */ - function strtrunc($charset,$string,$len) { - if ($len <= 0) return ''; - - if ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'mbstring') { - return mb_strcut($string,0,$len,$charset); - } elseif ($charset == 'utf-8') { - return $this->utf8_strtrunc($string,$len); - } elseif ($this->eucBasedSets[$charset]) { - return $this->euc_strtrunc($string,$charset); - } elseif ($this->twoByteSets[$charset]) { - if ($len % 2) $len--; // don't cut at odd positions - } elseif ($this->fourByteSets[$charset]) { - $x = $len % 4; - $len -= $x; // realign to position dividable by four + function strtrunc($charset, $string, $len) { + if ($len <= 0) { + return ''; } - // treat everything else as single-byte encoding - return substr($string,0,$len); + + if ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'mbstring') { + return mb_strcut($string, 0, $len, $charset); + } elseif ($charset == 'utf-8') { + return $this->utf8_strtrunc($string, $len); + } elseif ($this->eucBasedSets[$charset]) { + return $this->euc_strtrunc($string, $len, $charset); + } elseif ($this->twoByteSets[$charset]) { + if ($len % 2) { + $len--; + } // don't cut at odd positions + } elseif ($this->fourByteSets[$charset]) { + $x = $len % 4; + $len -= $x; // realign to position dividable by four + } + // treat everything else as single-byte encoding + return substr($string, 0, $len); } /** * Translates all characters of a string into their respective case values. * Unlike strtolower() and strtoupper() this method is locale independent. * Note that the string length may change! - * eg. lower case German �(sharp S) becomes upper case "SS" + * eg. lower case German "ß" (sharp S) becomes upper case "SS" * Unit-tested by Kasper * Real case folding is language dependent, this method ignores this fact. * @@ -1589,40 +1683,40 @@ class t3lib_cs { * @author Martin Kutschker * @see strtolower(), strtoupper() */ - function conv_case($charset,$string,$case) { - if ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'mbstring') { - if ($case == 'toLower') { - $string = mb_strtolower($string,$charset); + function conv_case($charset, $string, $case) { + if ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'mbstring') { + if ($case == 'toLower') { + $string = mb_strtolower($string, $charset); } else { - $string = mb_strtoupper($string,$charset); + $string = mb_strtoupper($string, $charset); } - } elseif ($charset == 'utf-8') { - $string = $this->utf8_char_mapping($string,'case',$case); - } elseif (isset($this->eucBasedSets[$charset])) { - $string = $this->euc_char_mapping($string,$charset,'case',$case); + } elseif ($charset == 'utf-8') { + $string = $this->utf8_char_mapping($string, 'case', $case); + } elseif (isset($this->eucBasedSets[$charset])) { + $string = $this->euc_char_mapping($string, $charset, 'case', $case); } else { // treat everything else as single-byte encoding - $string = $this->sb_char_mapping($string,$charset,'case',$case); + $string = $this->sb_char_mapping($string, $charset, 'case', $case); } return $string; } /** - * Converts special chars (like ���, umlauts etc) to ascii equivalents (usually double-bytes, like �=> ae etc.) + * Converts special chars (like æøåÆØÅ, umlauts etc) to ascii equivalents (usually double-bytes, like æ => ae etc.) * * @param string Character set of string * @param string Input string to convert * @return string The converted string */ - function specCharsToASCII($charset,$string) { - if ($charset == 'utf-8') { - $string = $this->utf8_char_mapping($string,'ascii'); - } elseif (isset($this->eucBasedSets[$charset])) { - $string = $this->euc_char_mapping($string,$charset,'ascii'); + function specCharsToASCII($charset, $string) { + if ($charset == 'utf-8') { + $string = $this->utf8_char_mapping($string, 'ascii'); + } elseif (isset($this->eucBasedSets[$charset])) { + $string = $this->euc_char_mapping($string, $charset, 'ascii'); } else { // treat everything else as single-byte encoding - $string = $this->sb_char_mapping($string,$charset,'ascii'); + $string = $this->sb_char_mapping($string, $charset, 'ascii'); } return $string; @@ -1633,7 +1727,7 @@ class t3lib_cs { * converts the language codes that we get from the client (usually HTTP_ACCEPT_LANGUAGE) * into a TYPO3-readable language code * @param $languageCodesList list of language codes. something like 'de,en-us;q=0.9,de-de;q=0.7,es-cl;q=0.6,en;q=0.4,es;q=0.3,zh;q=0.1' - * see http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.4 + * see http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.4 * @return string a preferred language that TYPO3 supports, or "default" if none found * @author Benjamin Mack (benni.typo3.org) */ @@ -1641,35 +1735,35 @@ class t3lib_cs { $allLanguageCodes = array(); $selectedLanguage = 'default'; - // get all languages where TYPO3 code is the same as the ISO code + // get all languages where TYPO3 code is the same as the ISO code foreach ($this->charSetArray as $typo3Lang => $charSet) { $allLanguageCodes[$typo3Lang] = $typo3Lang; } - // get all languages where TYPO3 code differs from ISO code - // or needs the country part - // the iso codes will here overwrite the default typo3 language in the key + // get all languages where TYPO3 code differs from ISO code + // or needs the country part + // the iso codes will here overwrite the default typo3 language in the key foreach ($this->isoArray as $typo3Lang => $isoLang) { $isoLang = join('-', explode('_', $isoLang)); $allLanguageCodes[$typo3Lang] = $isoLang; } - // move the iso codes to the (because we're comparing the keys with "isset" later on) + // move the iso codes to the (because we're comparing the keys with "isset" later on) $allLanguageCodes = array_flip($allLanguageCodes); $preferredLanguages = t3lib_div::trimExplode(',', $languageCodesList); - // order the preferred languages after they key + // order the preferred languages after they key $sortedPreferredLanguages = array(); foreach ($preferredLanguages as $preferredLanguage) { $quality = 1.0; - if (strpos($preferredLanguage, ';q=') !== false) { + if (strpos($preferredLanguage, ';q=') !== FALSE) { list($preferredLanguage, $quality) = explode(';q=', $preferredLanguage); } $sortedPreferredLanguages[$preferredLanguage] = $quality; } - // loop through the languages, with the highest priority first + // loop through the languages, with the highest priority first arsort($sortedPreferredLanguages, SORT_NUMERIC); foreach ($sortedPreferredLanguages as $preferredLanguage => $quality) { if (isset($allLanguageCodes[$preferredLanguage])) { @@ -1677,7 +1771,7 @@ class t3lib_cs { break; } - // strip the country code from the end + // strip the country code from the end list($preferredLanguage, $preferredCountry) = explode('-', $preferredLanguage); if (isset($allLanguageCodes[$preferredLanguage])) { $selectedLanguage = $allLanguageCodes[$preferredLanguage]; @@ -1691,14 +1785,6 @@ class t3lib_cs { } - - - - - - - - /******************************************** * * Internal string operation functions @@ -1715,15 +1801,19 @@ class t3lib_cs { * @return string the converted string * @author Martin Kutschker */ - function sb_char_mapping($str,$charset,$mode,$opt='') { - switch($mode) { + function sb_char_mapping($str, $charset, $mode, $opt = '') { + switch ($mode) { case 'case': - if (!$this->initCaseFolding($charset)) return $str; // do nothing + if (!$this->initCaseFolding($charset)) { + return $str; + } // do nothing $map =& $this->caseFolding[$charset][$opt]; break; case 'ascii': - if (!$this->initToASCII($charset)) return $str; // do nothing + if (!$this->initToASCII($charset)) { + return $str; + } // do nothing $map =& $this->toASCII[$charset]; break; @@ -1732,9 +1822,9 @@ class t3lib_cs { } $out = ''; - for($i=0; strlen($str{$i}); $i++) { + for ($i = 0; strlen($str{$i}); $i++) { $c = $str{$i}; - if (isset($map[$c])) { + if (isset($map[$c])) { $out .= $map[$c]; } else { $out .= $c; @@ -1745,14 +1835,6 @@ class t3lib_cs { } - - - - - - - - /******************************************** * * Internal UTF-8 string operation functions @@ -1770,28 +1852,36 @@ class t3lib_cs { * @see substr() * @author Martin Kutschker */ - function utf8_substr($str,$start,$len=null) { - if (!strcmp($len,'0')) return ''; + function utf8_substr($str, $start, $len = NULL) { + if (!strcmp($len, '0')) { + return ''; + } - $byte_start = $this->utf8_char2byte_pos($str,$start); - if ($byte_start === false) { - if ($start > 0) { - return false; // $start outside string length + $byte_start = $this->utf8_char2byte_pos($str, $start); + if ($byte_start === FALSE) { + if ($start > 0) { + return FALSE; // $start outside string length } else { $start = 0; } } - $str = substr($str,$byte_start); + $str = substr($str, $byte_start); - if ($len!=null) { - $byte_end = $this->utf8_char2byte_pos($str,$len); - if ($byte_end === false) // $len outside actual string length - return $len<0 ? '' : $str; // When length is less than zero and exceeds, then we return blank string. + if ($len != NULL) { + $byte_end = $this->utf8_char2byte_pos($str, $len); + if ($byte_end === FALSE) // $len outside actual string length + { + return $len < 0 ? '' : $str; + } // When length is less than zero and exceeds, then we return blank string. else - return substr($str,0,$byte_end); + { + return substr($str, 0, $byte_end); + } + } + else { + return $str; } - else return $str; } /** @@ -1803,14 +1893,18 @@ class t3lib_cs { * @see strlen() * @author Martin Kutschker */ - function utf8_strlen($str) { - $n=0; - for($i=0; strlen($str{$i}); $i++) { + function utf8_strlen($str) { + $n = 0; + for ($i = 0; strlen($str{$i}); $i++) { $c = ord($str{$i}); - if (!($c & 0x80)) // single-byte (0xxxxxx) + if (!($c & 0x80)) // single-byte (0xxxxxx) + { $n++; - elseif (($c & 0xC0) == 0xC0) // multi-byte starting byte (11xxxxxx) + } + elseif (($c & 0xC0) == 0xC0) // multi-byte starting byte (11xxxxxx) + { $n++; + } } return $n; } @@ -1824,16 +1918,20 @@ class t3lib_cs { * @see mb_strcut() * @author Martin Kutschker */ - function utf8_strtrunc($str,$len) { - $i = $len-1; + function utf8_strtrunc($str, $len) { + $i = $len - 1; if (ord($str{$i}) & 0x80) { // part of a multibyte sequence - for (; $i>0 && !(ord($str{$i}) & 0x40); $i--) ; // find the first byte - if ($i <= 0) return ''; // sanity check - for ($bc=0, $mbs=ord($str{$i}); $mbs & 0x80; $mbs = $mbs << 1) $bc++; // calculate number of bytes - if ($bc+$i > $len) return substr($str,0,$i); + for (; $i > 0 && !(ord($str{$i}) & 0x40); $i--) ; // find the first byte + if ($i <= 0) { + return ''; + } // sanity check + for ($bc = 0, $mbs = ord($str{$i}); $mbs & 0x80; $mbs = $mbs << 1) $bc++; // calculate number of bytes + if ($bc + $i > $len) { + return substr($str, 0, $i); + } // fallthru: multibyte char fits into length } - return substr($str,0,$len); + return substr($str, 0, $len); } /** @@ -1846,20 +1944,24 @@ class t3lib_cs { * @see strpos() * @author Martin Kutschker */ - function utf8_strpos($haystack,$needle,$offset=0) { - if ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'mbstring') { - return mb_strpos($haystack,$needle,$offset,'utf-8'); - } elseif ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'iconv') { - return iconv_strpos($haystack,$needle,$offset,'utf-8'); + function utf8_strpos($haystack, $needle, $offset = 0) { + if ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'mbstring') { + return mb_strpos($haystack, $needle, $offset, 'utf-8'); + } elseif ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'iconv') { + return iconv_strpos($haystack, $needle, $offset, 'utf-8'); } - $byte_offset = $this->utf8_char2byte_pos($haystack,$offset); - if ($byte_offset === false) return false; // offset beyond string length + $byte_offset = $this->utf8_char2byte_pos($haystack, $offset); + if ($byte_offset === FALSE) { + return FALSE; + } // offset beyond string length - $byte_pos = strpos($haystack,$needle,$byte_offset); - if ($byte_pos === false) return false; // needle not found + $byte_pos = strpos($haystack, $needle, $byte_offset); + if ($byte_pos === FALSE) { + return FALSE; + } // needle not found - return $this->utf8_byte2char_pos($haystack,$byte_pos); + return $this->utf8_byte2char_pos($haystack, $byte_pos); } /** @@ -1871,17 +1973,19 @@ class t3lib_cs { * @see strrpos() * @author Martin Kutschker */ - function utf8_strrpos($haystack,$needle) { - if ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'mbstring') { - return mb_strrpos($haystack,$needle,'utf-8'); - } elseif ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'iconv') { - return iconv_strrpos($haystack,$needle,'utf-8'); + function utf8_strrpos($haystack, $needle) { + if ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'mbstring') { + return mb_strrpos($haystack, $needle, 'utf-8'); + } elseif ($GLOBALS['TYPO3_CONF_VARS']['SYS']['t3lib_cs_utils'] == 'iconv') { + return iconv_strrpos($haystack, $needle, 'utf-8'); } - $byte_pos = strrpos($haystack,$needle); - if ($byte_pos === false) return false; // needle not found + $byte_pos = strrpos($haystack, $needle); + if ($byte_pos === FALSE) { + return FALSE; + } // needle not found - return $this->utf8_byte2char_pos($haystack,$byte_pos); + return $this->utf8_byte2char_pos($haystack, $byte_pos); } /** @@ -1893,30 +1997,38 @@ class t3lib_cs { * @return integer Byte position * @author Martin Kutschker */ - function utf8_char2byte_pos($str,$pos) { - $n = 0; // number of characters found - $p = abs($pos); // number of characters wanted + function utf8_char2byte_pos($str, $pos) { + $n = 0; // number of characters found + $p = abs($pos); // number of characters wanted - if ($pos >= 0) { + if ($pos >= 0) { $i = 0; $d = 1; } else { - $i = strlen($str)-1; + $i = strlen($str) - 1; $d = -1; } - for( ; strlen($str{$i}) && $n<$p; $i+=$d) { - $c = (int)ord($str{$i}); - if (!($c & 0x80)) // single-byte (0xxxxxx) + for (; strlen($str{$i}) && $n < $p; $i += $d) { + $c = (int) ord($str{$i}); + if (!($c & 0x80)) // single-byte (0xxxxxx) + { $n++; - elseif (($c & 0xC0) == 0xC0) // multi-byte starting byte (11xxxxxx) + } + elseif (($c & 0xC0) == 0xC0) // multi-byte starting byte (11xxxxxx) + { $n++; + } } - if (!strlen($str{$i})) return false; // offset beyond string length + if (!strlen($str{$i})) { + return FALSE; + } // offset beyond string length - if ($pos >= 0) { + if ($pos >= 0) { // skip trailing multi-byte data bytes - while ((ord($str{$i}) & 0x80) && !(ord($str{$i}) & 0x40)) { $i++; } + while ((ord($str{$i}) & 0x80) && !(ord($str{$i}) & 0x40)) { + $i++; + } } else { // correct offset $i++; @@ -1934,16 +2046,22 @@ class t3lib_cs { * @return integer character position * @author Martin Kutschker */ - function utf8_byte2char_pos($str,$pos) { - $n = 0; // number of characters - for($i=$pos; $i>0; $i--) { - $c = (int)ord($str{$i}); - if (!($c & 0x80)) // single-byte (0xxxxxx) + function utf8_byte2char_pos($str, $pos) { + $n = 0; // number of characters + for ($i = $pos; $i > 0; $i--) { + $c = (int) ord($str{$i}); + if (!($c & 0x80)) // single-byte (0xxxxxx) + { $n++; - elseif (($c & 0xC0) == 0xC0) // multi-byte starting byte (11xxxxxx) + } + elseif (($c & 0xC0) == 0xC0) // multi-byte starting byte (11xxxxxx) + { $n++; + } } - if (!strlen($str{$i})) return false; // offset beyond string length + if (!strlen($str{$i})) { + return FALSE; + } // offset beyond string length return $n; } @@ -1957,11 +2075,13 @@ class t3lib_cs { * @return string the converted string * @author Martin Kutschker */ - function utf8_char_mapping($str,$mode,$opt='') { - if (!$this->initUnicodeData($mode)) return $str; // do nothing + function utf8_char_mapping($str, $mode, $opt = '') { + if (!$this->initUnicodeData($mode)) { + return $str; + } // do nothing $out = ''; - switch($mode) { + switch ($mode) { case 'case': $map =& $this->caseFolding['utf-8'][$opt]; break; @@ -1974,17 +2094,21 @@ class t3lib_cs { return $str; } - for($i=0; strlen($str{$i}); $i++) { + for ($i = 0; strlen($str{$i}); $i++) { $c = ord($str{$i}); - if (!($c & 0x80)) // single-byte (0xxxxxx) + if (!($c & 0x80)) // single-byte (0xxxxxx) + { $mbc = $str{$i}; - elseif (($c & 0xC0) == 0xC0) { // multi-byte starting byte (11xxxxxx) - for ($bc=0; $c & 0x80; $c = $c << 1) { $bc++; } // calculate number of bytes - $mbc = substr($str,$i,$bc); - $i += $bc-1; + } + elseif (($c & 0xC0) == 0xC0) { // multi-byte starting byte (11xxxxxx) + for ($bc = 0; $c & 0x80; $c = $c << 1) { + $bc++; + } // calculate number of bytes + $mbc = substr($str, $i, $bc); + $i += $bc - 1; } - if (isset($map[$mbc])) { + if (isset($map[$mbc])) { $out .= $map[$mbc]; } else { $out .= $mbc; @@ -1995,22 +2119,6 @@ class t3lib_cs { } - - - - - - - - - - - - - - - - /******************************************** * * Internal EUC string operation functions @@ -2033,23 +2141,29 @@ class t3lib_cs { * @see mb_strcut() * @author Martin Kutschker */ - function euc_strtrunc($str,$len,$charset) { + function euc_strtrunc($str, $len, $charset) { $sjis = ($charset == 'shift_jis'); - for ($i=0; strlen($str{$i}) && $i<$len; $i++) { + for ($i = 0; strlen($str{$i}) && $i < $len; $i++) { $c = ord($str{$i}); - if ($sjis) { - if (($c >= 0x80 && $c < 0xA0) || ($c >= 0xE0)) $i++; // advance a double-byte char + if ($sjis) { + if (($c >= 0x80 && $c < 0xA0) || ($c >= 0xE0)) { + $i++; + } // advance a double-byte char } - else { - if ($c >= 0x80) $i++; // advance a double-byte char + else { + if ($c >= 0x80) { + $i++; + } // advance a double-byte char } } - if (!strlen($str{$i})) return $str; // string shorter than supplied length + if (!strlen($str{$i})) { + return $str; + } // string shorter than supplied length - if ($i>$len) { - return substr($str,0,$len-1); // we ended on a first byte + if ($i > $len) { + return substr($str, 0, $len - 1); // we ended on a first byte } else { - return substr($str,0,$len); + return substr($str, 0, $len); } } @@ -2063,20 +2177,28 @@ class t3lib_cs { * @return string the substring * @author Martin Kutschker */ - function euc_substr($str,$start,$charset,$len=null) { - $byte_start = $this->euc_char2byte_pos($str,$start,$charset); - if ($byte_start === false) return false; // $start outside string length + function euc_substr($str, $start, $charset, $len = NULL) { + $byte_start = $this->euc_char2byte_pos($str, $start, $charset); + if ($byte_start === FALSE) { + return FALSE; + } // $start outside string length - $str = substr($str,$byte_start); + $str = substr($str, $byte_start); - if ($len!=null) { - $byte_end = $this->euc_char2byte_pos($str,$len,$charset); - if ($byte_end === false) // $len outside actual string length + if ($len != NULL) { + $byte_end = $this->euc_char2byte_pos($str, $len, $charset); + if ($byte_end === FALSE) // $len outside actual string length + { return $str; + } else - return substr($str,0,$byte_end); + { + return substr($str, 0, $byte_end); + } + } + else { + return $str; } - else return $str; } /** @@ -2088,16 +2210,20 @@ class t3lib_cs { * @see strlen() * @author Martin Kutschker */ - function euc_strlen($str,$charset) { + function euc_strlen($str, $charset) { $sjis = ($charset == 'shift_jis'); - $n=0; - for ($i=0; strlen($str{$i}); $i++) { + $n = 0; + for ($i = 0; strlen($str{$i}); $i++) { $c = ord($str{$i}); - if ($sjis) { - if (($c >= 0x80 && $c < 0xA0) || ($c >= 0xE0)) $i++; // advance a double-byte char + if ($sjis) { + if (($c >= 0x80 && $c < 0xA0) || ($c >= 0xE0)) { + $i++; + } // advance a double-byte char } - else { - if ($c >= 0x80) $i++; // advance a double-byte char + else { + if ($c >= 0x80) { + $i++; + } // advance a double-byte char } $n++; @@ -2115,33 +2241,41 @@ class t3lib_cs { * @return integer byte position * @author Martin Kutschker */ - function euc_char2byte_pos($str,$pos,$charset) { + function euc_char2byte_pos($str, $pos, $charset) { $sjis = ($charset == 'shift_jis'); $n = 0; // number of characters seen - $p = abs($pos); // number of characters wanted + $p = abs($pos); // number of characters wanted - if ($pos >= 0) { + if ($pos >= 0) { $i = 0; $d = 1; } else { - $i = strlen($str)-1; + $i = strlen($str) - 1; $d = -1; } - for ( ; strlen($str{$i}) && $n<$p; $i+=$d) { + for (; strlen($str{$i}) && $n < $p; $i += $d) { $c = ord($str{$i}); - if ($sjis) { - if (($c >= 0x80 && $c < 0xA0) || ($c >= 0xE0)) $i+=$d; // advance a double-byte char + if ($sjis) { + if (($c >= 0x80 && $c < 0xA0) || ($c >= 0xE0)) { + $i += $d; + } // advance a double-byte char } - else { - if ($c >= 0x80) $i+=$d; // advance a double-byte char + else { + if ($c >= 0x80) { + $i += $d; + } // advance a double-byte char } $n++; } - if (!strlen($str{$i})) return false; // offset beyond string length + if (!strlen($str{$i})) { + return FALSE; + } // offset beyond string length - if ($pos < 0) $i++; // correct offset + if ($pos < 0) { + $i++; + } // correct offset return $i; } @@ -2156,15 +2290,19 @@ class t3lib_cs { * @return string the converted string * @author Martin Kutschker */ - function euc_char_mapping($str,$charset,$mode,$opt='') { - switch($mode) { + function euc_char_mapping($str, $charset, $mode, $opt = '') { + switch ($mode) { case 'case': - if (!$this->initCaseFolding($charset)) return $str; // do nothing + if (!$this->initCaseFolding($charset)) { + return $str; + } // do nothing $map =& $this->caseFolding[$charset][$opt]; break; case 'ascii': - if (!$this->initToASCII($charset)) return $str; // do nothing + if (!$this->initToASCII($charset)) { + return $str; + } // do nothing $map =& $this->toASCII[$charset]; break; @@ -2174,24 +2312,24 @@ class t3lib_cs { $sjis = ($charset == 'shift_jis'); $out = ''; - for($i=0; strlen($str{$i}); $i++) { + for ($i = 0; strlen($str{$i}); $i++) { $mbc = $str{$i}; $c = ord($mbc); - if ($sjis) { - if (($c >= 0x80 && $c < 0xA0) || ($c >= 0xE0)) { // a double-byte char - $mbc = substr($str,$i,2); + if ($sjis) { + if (($c >= 0x80 && $c < 0xA0) || ($c >= 0xE0)) { // a double-byte char + $mbc = substr($str, $i, 2); $i++; } } - else { - if ($c >= 0x80) { // a double-byte char - $mbc = substr($str,$i,2); + else { + if ($c >= 0x80) { // a double-byte char + $mbc = substr($str, $i, 2); $i++; } } - if (isset($map[$mbc])) { + if (isset($map[$mbc])) { $out .= $map[$mbc]; } else { $out .= $mbc; @@ -2203,8 +2341,8 @@ class t3lib_cs { } -if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_cs.php']) { - include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_cs.php']); +if (defined('TYPO3_MODE') && isset($GLOBALS['TYPO3_CONF_VARS'][TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_cs.php'])) { + include_once($GLOBALS['TYPO3_CONF_VARS'][TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_cs.php']); } ?> diff --git a/lib/typo3/class.t3lib_div.php b/lib/typo3/class.t3lib_div.php index d4f0e922081..d01a7e9fa15 100644 --- a/lib/typo3/class.t3lib_div.php +++ b/lib/typo3/class.t3lib_div.php @@ -1,38 +1,38 @@ + * @author Kasper Skårhøj */ /** * [CLASS/FUNCTION INDEX of SCRIPT] @@ -41,177 +41,175 @@ * * 232: class t3lib_div * - * SECTION: GET/POST Variables - * 262: function _GP($var) - * 280: function _GET($var=NULL) - * 297: function _POST($var=NULL) - * 313: function _GETset($inputGet,$key='') - * 336: function GPvar($var,$strip=0) - * 353: function GParrayMerged($var) + * SECTION: GET/POST Variables + * 262: function _GP($var) + * 280: function _GET($var=NULL) + * 297: function _POST($var=NULL) + * 313: function _GETset($inputGet,$key='') + * 336: function GPvar($var,$strip=0) + * 353: function GParrayMerged($var) * - * SECTION: IMAGE FUNCTIONS - * 397: function gif_compress($theFile, $type) - * 425: function png_to_gif_by_imagemagick($theFile) - * 450: function read_png_gif($theFile,$output_png=0) + * SECTION: IMAGE FUNCTIONS + * 397: function gif_compress($theFile, $type) + * 425: function png_to_gif_by_imagemagick($theFile) + * 450: function read_png_gif($theFile,$output_png=0) * - * SECTION: STRING FUNCTIONS - * 499: function fixed_lgd($string,$origChars,$preStr='...') - * 524: function fixed_lgd_pre($string,$chars) - * 538: function fixed_lgd_cs($string,$chars) - * 555: function breakTextForEmail($str,$implChar="\n",$charWidth=76) - * 574: function breakLinesForEmail($str,$implChar="\n",$charWidth=76) - * 610: function cmpIP($baseIP, $list) - * 626: function cmpIPv4($baseIP, $list) - * 668: function cmpIPv6($baseIP, $list) - * 711: function IPv6Hex2Bin ($hex) - * 726: function normalizeIPv6($address) - * 782: function validIPv6($ip) - * 805: function cmpFQDN($baseIP, $list) - * 835: function inList($list,$item) - * 847: function rmFromList($element,$list) - * 863: function expandList($list) - * 894: function intInRange($theInt,$min,$max=2000000000,$zeroValue=0) - * 910: function intval_positive($theInt) - * 923: function int_from_ver($verNumberStr) - * 934: function compat_version($verNumberStr) - * 952: function md5int($str) - * 965: function shortMD5($input, $len=10) - * 978: function uniqueList($in_list, $secondParameter=NULL) - * 992: function split_fileref($fileref) - * 1030: function dirname($path) - * 1046: function modifyHTMLColor($color,$R,$G,$B) - * 1066: function modifyHTMLColorAll($color,$all) - * 1077: function rm_endcomma($string) - * 1090: function danish_strtoupper($string) - * 1105: function convUmlauts($str) - * 1118: function testInt($var) - * 1130: function isFirstPartOfStr($str,$partStr) - * 1146: function formatSize($sizeInBytes,$labels='') - * 1181: function convertMicrotime($microtime) - * 1195: function splitCalc($string,$operators) - * 1217: function calcPriority($string) - * 1258: function calcParenthesis($string) - * 1284: function htmlspecialchars_decode($value) - * 1299: function deHSCentities($str) - * 1312: function slashJS($string,$extended=0,$char="'") - * 1325: function rawUrlEncodeJS($str) - * 1337: function rawUrlEncodeFP($str) - * 1348: function validEmail($email) - * 1363: function formatForTextarea($content) + * SECTION: STRING FUNCTIONS + * 499: function fixed_lgd($string,$origChars,$preStr='...') + * 524: function fixed_lgd_pre($string,$chars) + * 538: function fixed_lgd_cs($string,$chars) + * 555: function breakTextForEmail($str,$implChar=LF,$charWidth=76) + * 574: function breakLinesForEmail($str,$implChar=LF,$charWidth=76) + * 610: function cmpIP($baseIP, $list) + * 626: function cmpIPv4($baseIP, $list) + * 668: function cmpIPv6($baseIP, $list) + * 711: function IPv6Hex2Bin ($hex) + * 726: function normalizeIPv6($address) + * 782: function validIPv6($ip) + * 805: function cmpFQDN($baseIP, $list) + * 835: function inList($list,$item) + * 847: function rmFromList($element,$list) + * 863: function expandList($list) + * 894: function intInRange($theInt,$min,$max=2000000000,$zeroValue=0) + * 910: function intval_positive($theInt) + * 923: function int_from_ver($verNumberStr) + * 934: function compat_version($verNumberStr) + * 952: function md5int($str) + * 965: function shortMD5($input, $len=10) + * 978: function uniqueList($in_list, $secondParameter=NULL) + * 992: function split_fileref($fileref) + * 1030: function dirname($path) + * 1046: function modifyHTMLColor($color,$R,$G,$B) + * 1066: function modifyHTMLColorAll($color,$all) + * 1077: function rm_endcomma($string) + * 1090: function danish_strtoupper($string) + * 1105: function convUmlauts($str) + * 1118: function testInt($var) + * 1130: function isFirstPartOfStr($str,$partStr) + * 1146: function formatSize($sizeInBytes,$labels='') + * 1181: function convertMicrotime($microtime) + * 1195: function splitCalc($string,$operators) + * 1217: function calcPriority($string) + * 1258: function calcParenthesis($string) + * 1284: function htmlspecialchars_decode($value) + * 1299: function deHSCentities($str) + * 1312: function slashJS($string,$extended=0,$char="'") + * 1325: function rawUrlEncodeJS($str) + * 1337: function rawUrlEncodeFP($str) + * 1348: function validEmail($email) + * 1363: function formatForTextarea($content) * - * SECTION: ARRAY FUNCTIONS - * 1394: function inArray($in_array,$item) - * 1411: function intExplode($delim, $string) - * 1430: function revExplode($delim, $string, $count=0) - * 1450: function trimExplode($delim, $string, $onlyNonEmptyValues=0) - * 1472: function uniqueArray($valueArray) - * 1484: function removeArrayEntryByValue($array,$cmpValue) - * 1513: function implodeArrayForUrl($name,$theArray,$str='',$skipBlank=0,$rawurlencodeParamName=0) - * 1538: function explodeUrl2Array($string,$multidim=FALSE) - * 1564: function compileSelectedGetVarsFromArray($varList,$getArray,$GPvarAlt=1) - * 1587: function addSlashesOnArray(&$theArray) - * 1611: function stripSlashesOnArray(&$theArray) - * 1633: function slashArray($arr,$cmd) - * 1650: function array_merge_recursive_overrule($arr0,$arr1,$notAddKeys=0,$includeEmtpyValues=true) - * 1683: function array_merge($arr1,$arr2) - * 1696: function csvValues($row,$delim=',',$quote='"') + * SECTION: ARRAY FUNCTIONS + * 1394: function inArray($in_array,$item) + * 1411: function intExplode($delim, $string) + * 1430: function revExplode($delim, $string, $count=0) + * 1450: function trimExplode($delim, $string, $onlyNonEmptyValues=0) + * 1472: function uniqueArray($valueArray) + * 1484: function removeArrayEntryByValue($array,$cmpValue) + * 1513: function implodeArrayForUrl($name,$theArray,$str='',$skipBlank=0,$rawurlencodeParamName=0) + * 1538: function explodeUrl2Array($string,$multidim=FALSE) + * 1564: function compileSelectedGetVarsFromArray($varList,$getArray,$GPvarAlt=1) + * 1587: function addSlashesOnArray(&$theArray) + * 1611: function stripSlashesOnArray(&$theArray) + * 1633: function slashArray($arr,$cmd) + * 1650: function array_merge_recursive_overrule($arr0,$arr1,$notAddKeys=0,$includeEmtpyValues=true) + * 1683: function array_merge($arr1,$arr2) + * 1696: function csvValues($row,$delim=',',$quote='"') * - * SECTION: HTML/XML PROCESSING - * 1738: function get_tag_attributes($tag) - * 1775: function split_tag_attributes($tag) - * 1809: function implodeAttributes($arr,$xhtmlSafe=FALSE,$dontOmitBlankAttribs=FALSE) - * 1836: function implodeParams($arr,$xhtmlSafe=FALSE,$dontOmitBlankAttribs=FALSE) - * 1851: function wrapJS($string, $linebreak=TRUE) - * 1882: function xml2tree($string,$depth=999) - * 1969: function array2xml($array,$NSprefix='',$level=0,$docTag='phparray',$spaceInd=0, $options=array(),$stackData=array()) - * 2088: function xml2array($string,$NSprefix='',$reportDocTag=FALSE) - * 2198: function xmlRecompileFromStructValArray($vals) - * 2242: function xmlGetHeaderAttribs($xmlData) + * SECTION: HTML/XML PROCESSING + * 1738: function get_tag_attributes($tag) + * 1775: function split_tag_attributes($tag) + * 1809: function implodeAttributes($arr,$xhtmlSafe=FALSE,$dontOmitBlankAttribs=FALSE) + * 1836: function implodeParams($arr,$xhtmlSafe=FALSE,$dontOmitBlankAttribs=FALSE) + * 1851: function wrapJS($string, $linebreak=TRUE) + * 1882: function xml2tree($string,$depth=999) + * 1969: function array2xml($array,$NSprefix='',$level=0,$docTag='phparray',$spaceInd=0, $options=array(),$stackData=array()) + * 2088: function xml2array($string,$NSprefix='',$reportDocTag=FALSE) + * 2198: function xmlRecompileFromStructValArray($vals) + * 2242: function xmlGetHeaderAttribs($xmlData) * - * SECTION: FILES FUNCTIONS - * 2275: function getUrl($url, $includeHeader=0) - * 2342: function writeFile($file,$content) - * 2367: function fixPermissions($file) - * 2384: function writeFileToTypo3tempDir($filepath,$content) - * 2427: function mkdir($theNewFolder) - * 2446: function mkdir_deep($destination,$deepDir) - * 2468: function get_dirs($path) - * 2493: function getFilesInDir($path,$extensionList='',$prependPath=0,$order='') - * 2547: function getAllFilesAndFoldersInPath($fileArr,$path,$extList='',$regDirs=0,$recursivityLevels=99) - * 2570: function removePrefixPathFromList($fileArr,$prefixToRemove) - * 2586: function fixWindowsFilePath($theFile) - * 2598: function resolveBackPath($pathStr) - * 2626: function locationHeaderUrl($path) + * SECTION: FILES FUNCTIONS + * 2275: function getURL($url, $includeHeader=0) + * 2342: function writeFile($file,$content) + * 2367: function fixPermissions($file) + * 2384: function writeFileToTypo3tempDir($filepath,$content) + * 2427: function mkdir($theNewFolder) + * 2446: function mkdir_deep($destination,$deepDir) + * 2468: function get_dirs($path) + * 2493: function getFilesInDir($path,$extensionList='',$prependPath=0,$order='') + * 2547: function getAllFilesAndFoldersInPath($fileArr,$path,$extList='',$regDirs=0,$recursivityLevels=99) + * 2570: function removePrefixPathFromList($fileArr,$prefixToRemove) + * 2586: function fixWindowsFilePath($theFile) + * 2598: function resolveBackPath($pathStr) + * 2626: function locationHeaderUrl($path) * - * SECTION: DEBUG helper FUNCTIONS - * 2666: function debug_ordvalue($string,$characters=100) - * 2683: function view_array($array_in) - * 2711: function print_array($array_in) - * 2726: function debug($var="",$brOrHeader=0) - * 2757: function debug_trail() - * 2779: function debugRows($rows,$header='') + * SECTION: DEBUG helper FUNCTIONS + * 2666: function debug_ordvalue($string,$characters=100) + * 2683: function view_array($array_in) + * 2711: function print_array($array_in) + * 2726: function debug($var="",$brOrHeader=0) + * 2757: function debug_trail() + * 2779: function debugRows($rows,$header='') * - * SECTION: SYSTEM INFORMATION - * 2857: function getThisUrl() - * 2873: function linkThisScript($getParams=array()) - * 2897: function linkThisUrl($url,$getParams=array()) - * 2920: function getIndpEnv($getEnvName) - * 3113: function milliseconds() - * 3125: function clientInfo($useragent='') + * SECTION: SYSTEM INFORMATION + * 2857: function getThisUrl() + * 2873: function linkThisScript($getParams=array()) + * 2897: function linkThisUrl($url,$getParams=array()) + * 2920: function getIndpEnv($getEnvName) + * 3113: function milliseconds() + * 3125: function clientInfo($useragent='') * - * SECTION: TYPO3 SPECIFIC FUNCTIONS - * 3212: function getFileAbsFileName($filename,$onlyRelative=1,$relToTYPO3_mainDir=0) - * 3248: function validPathStr($theFile) - * 3259: function isAbsPath($path) - * 3270: function isAllowedAbsPath($path) - * 3287: function verifyFilenameAgainstDenyPattern($filename) - * 3305: function upload_copy_move($source,$destination) - * 3331: function upload_to_tempfile($uploadedFileName) - * 3349: function unlink_tempfile($uploadedTempFileName) - * 3365: function tempnam($filePrefix) - * 3379: function stdAuthCode($uid_or_record,$fields='',$codeLength=8) - * 3410: function cHashParams($addQueryParams) - * 3433: function hideIfNotTranslated($l18n_cfg_fieldValue) - * 3448: function readLLfile($fileRef,$langKey) - * 3472: function readLLXMLfile($fileRef,$langKey) - * 3589: function llXmlAutoFileName($fileRef,$language) - * 3633: function loadTCA($table) - * 3653: function resolveSheetDefInDS($dataStructArray,$sheet='sDEF') - * 3686: function resolveAllSheetsInDS($dataStructArray) - * 3715: function callUserFunction($funcName,&$params,&$ref,$checkPrefix='user_',$silent=0) - * 3813: function &getUserObj($classRef,$checkPrefix='user_',$silent=0) - * 3871: function &makeInstance($className) - * 3883: function makeInstanceClassName($className) - * 3897: function &makeInstanceService($serviceType, $serviceSubType='', $excludeServiceKeys=array()) - * 3961: function plainMailEncoded($email,$subject,$message,$headers='',$enc='',$charset='',$dontEncodeHeader=false) - * 4031: function quoted_printable($string,$maxlen=76) - * 4078: function encodeHeader($line,$enc='',$charset='ISO-8859-1') - * 4121: function substUrlsInPlainText($message,$urlmode='76',$index_script_url='') - * 4155: function makeRedirectUrl($inUrl,$l=0,$index_script_url='') - * 4182: function freetypeDpiComp($font_size) - * 4194: function initSysLog() - * 4251: function sysLog($msg, $extKey, $severity=0) - * 4334: function devLog($msg, $extKey, $severity=0, $dataVar=FALSE) - * 4355: function arrayToLogString($arr, $valueList=array(), $valueLength=20) - * 4378: function imageMagickCommand($command, $parameters, $path='') - * 4425: function unQuoteFilenames($parameters,$unQuote=FALSE) - * 4459: function quoteJSvalue($value, $inScriptTags = false) + * SECTION: TYPO3 SPECIFIC FUNCTIONS + * 3212: function getFileAbsFileName($filename,$onlyRelative=1,$relToTYPO3_mainDir=0) + * 3248: function validPathStr($theFile) + * 3259: function isAbsPath($path) + * 3270: function isAllowedAbsPath($path) + * 3287: function verifyFilenameAgainstDenyPattern($filename) + * 3305: function upload_copy_move($source,$destination) + * 3331: function upload_to_tempfile($uploadedFileName) + * 3349: function unlink_tempfile($uploadedTempFileName) + * 3365: function tempnam($filePrefix) + * 3379: function stdAuthCode($uid_or_record,$fields='',$codeLength=8) + * 3410: function cHashParams($addQueryParams) + * 3433: function hideIfNotTranslated($l18n_cfg_fieldValue) + * 3448: function readLLfile($fileRef,$langKey) + * 3472: function readLLXMLfile($fileRef,$langKey) + * 3589: function llXmlAutoFileName($fileRef,$language) + * 3633: function loadTCA($table) + * 3653: function resolveSheetDefInDS($dataStructArray,$sheet='sDEF') + * 3686: function resolveAllSheetsInDS($dataStructArray) + * 3715: function callUserFunction($funcName,&$params,&$ref,$checkPrefix='user_',$silent=0) + * 3813: function &getUserObj($classRef,$checkPrefix='user_',$silent=0) + * 3871: function &makeInstance($className) + * 3883: function makeInstanceClassName($className) + * 3897: function &makeInstanceService($serviceType, $serviceSubType='', $excludeServiceKeys=array()) + * 3961: function plainMailEncoded($email,$subject,$message,$headers='',$enc='',$charset='',$dontEncodeHeader=false) + * 4031: function quoted_printable($string,$maxlen=76) + * 4078: function encodeHeader($line,$enc='',$charset='ISO-8859-1') + * 4121: function substUrlsInPlainText($message,$urlmode='76',$index_script_url='') + * 4155: function makeRedirectUrl($inUrl,$l=0,$index_script_url='') + * 4182: function freetypeDpiComp($font_size) + * 4194: function initSysLog() + * 4251: function sysLog($msg, $extKey, $severity=0) + * 4334: function devLog($msg, $extKey, $severity=0, $dataVar=FALSE) + * 4355: function arrayToLogString($arr, $valueList=array(), $valueLength=20) + * 4378: function imageMagickCommand($command, $parameters, $path='') + * 4425: function unQuoteFilenames($parameters,$unQuote=FALSE) + * 4459: function quoteJSvalue($value, $inScriptTags = false) * * TOTAL FUNCTIONS: 138 * (This index is automatically created/updated by the extension "extdeveval") * */ - - - - - - - - - - + // a tabulator +define('TAB', chr(9)); + // a linefeed +define('LF', chr(10)); + // a carriage return +define('CR', chr(13)); + // a CR-LF combination +define('CRLF', CR . LF); /** * The legendary "t3lib_div" class - Miscellaneous functions for general purpose. @@ -225,7 +223,7 @@ * So: Don't instantiate - call functions with "t3lib_div::" prefixed the function name. * So use t3lib_div::[method-name] to refer to the functions, eg. 't3lib_div::milliseconds()' * - * @author Kasper Skaarhoj + * @author Kasper Skårhøj * @package TYPO3 * @subpackage t3lib */ @@ -238,6 +236,27 @@ final class t3lib_div { const SYSLOG_SEVERITY_ERROR = 3; const SYSLOG_SEVERITY_FATAL = 4; + /** + * Singleton instances returned by makeInstance, using the class names as + * array keys + * + * @var array + */ + protected static $singletonInstances = array(); + + /** + * Instances returned by makeInstance, using the class names as array keys + * + * @var array + */ + protected static $nonSingletonInstances = array(); + + /** + * Register for makeInstance with given class name and final class names to reduce number of class_exists() calls + * + * @var array Given class name => final class name + */ + protected static $finalClassNameRegister = array(); /************************* * @@ -262,11 +281,17 @@ final class t3lib_div { * @param string GET/POST var to return * @return mixed POST var named $var and if not set, the GET var of the same name. */ - public static function _GP($var) { - if(empty($var)) return; + public static function _GP($var) { + if (empty($var)) { + return; + } $value = isset($_POST[$var]) ? $_POST[$var] : $_GET[$var]; - if (isset($value)) { - if (is_array($value)) { t3lib_div::stripSlashesOnArray($value); } else { $value = stripslashes($value); } + if (isset($value)) { + if (is_array($value)) { + self::stripSlashesOnArray($value); + } else { + $value = stripslashes($value); + } } return $value; } @@ -278,11 +303,11 @@ final class t3lib_div { * @return array Returns the GET vars merged recursively onto the POST vars. */ public static function _GPmerged($parameter) { - $postParameter = is_array($_POST[$parameter]) ? $_POST[$parameter] : array(); - $getParameter = is_array($_GET[$parameter]) ? $_GET[$parameter] : array(); + $postParameter = (isset($_POST[$parameter]) && is_array($_POST[$parameter])) ? $_POST[$parameter] : array(); + $getParameter = (isset($_GET[$parameter]) && is_array($_GET[$parameter])) ? $_GET[$parameter] : array(); - $mergedParameters = t3lib_div::array_merge_recursive_overrule($getParameter, $postParameter); - t3lib_div::stripSlashesOnArray($mergedParameters); + $mergedParameters = self::array_merge_recursive_overrule($getParameter, $postParameter); + self::stripSlashesOnArray($mergedParameters); return $mergedParameters; } @@ -296,10 +321,14 @@ final class t3lib_div { * @return mixed If $var is set it returns the value of $_GET[$var]. If $var is NULL (default), returns $_GET itself. In any case *slashes are stipped from the output!* * @see _POST(), _GP(), _GETset() */ - public static function _GET($var=NULL) { + public static function _GET($var = NULL) { $value = ($var === NULL) ? $_GET : (empty($var) ? NULL : $_GET[$var]); - if (isset($value)) { // Removes slashes since TYPO3 has added them regardless of magic_quotes setting. - if (is_array($value)) { t3lib_div::stripSlashesOnArray($value); } else { $value = stripslashes($value); } + if (isset($value)) { // Removes slashes since TYPO3 has added them regardless of magic_quotes setting. + if (is_array($value)) { + self::stripSlashesOnArray($value); + } else { + $value = stripslashes($value); + } } return $value; } @@ -313,34 +342,67 @@ final class t3lib_div { * @return mixed If $var is set it returns the value of $_POST[$var]. If $var is NULL (default), returns $_POST itself. In any case *slashes are stipped from the output!* * @see _GET(), _GP() */ - public static function _POST($var=NULL) { + public static function _POST($var = NULL) { $value = ($var === NULL) ? $_POST : (empty($var) ? NULL : $_POST[$var]); - if (isset($value)) { // Removes slashes since TYPO3 has added them regardless of magic_quotes setting. - if (is_array($value)) { t3lib_div::stripSlashesOnArray($value); } else { $value = stripslashes($value); } + if (isset($value)) { // Removes slashes since TYPO3 has added them regardless of magic_quotes setting. + if (is_array($value)) { + self::stripSlashesOnArray($value); + } else { + $value = stripslashes($value); + } } return $value; } /** - * Writes input value to $_GET + * Writes input value to $_GET. * Usage: 2 * - * @param mixed Array to write to $_GET. Values should NOT be escaped at input time (but will be escaped before writing according to TYPO3 standards). - * @param string Alternative key; If set, this will not set the WHOLE GET array, but only the key in it specified by this value! + * @param mixed $inputGet + * array or single value to write to $_GET. Values should NOT be + * escaped at input time (but will be escaped before writing + * according to TYPO3 standards). + * @param string $key + * alternative key; If set, this will not set the WHOLE GET array, + * but only the key in it specified by this value! + * You can specify to replace keys on deeper array levels by + * separating the keys with a pipe. + * Example: 'parentKey|childKey' will result in + * array('parentKey' => array('childKey' => $inputGet)) + * * @return void */ - public static function _GETset($inputGet,$key='') { - // ADDS slashes since TYPO3 standard currently is that slashes MUST be applied (regardless of magic_quotes setting). - if (strcmp($key,'')) { - if (is_array($inputGet)) { - t3lib_div::addSlashesOnArray($inputGet); + public static function _GETset($inputGet, $key = '') { + // adds slashes since TYPO3 standard currently is that slashes + // must be applied (regardless of magic_quotes setting) + if (is_array($inputGet)) { + self::addSlashesOnArray($inputGet); + } else { + $inputGet = addslashes($inputGet); + } + + if ($key != '') { + if (strpos($key, '|') !== FALSE) { + $pieces = explode('|', $key); + $newGet = array(); + $pointer =& $newGet; + foreach ($pieces as $piece) { + $pointer =& $pointer[$piece]; + } + $pointer = $inputGet; + $mergedGet = self::array_merge_recursive_overrule( + $_GET, $newGet + ); + + $_GET = $mergedGet; + $GLOBALS['HTTP_GET_VARS'] = $mergedGet; } else { - $inputGet = addslashes($inputGet); + $_GET[$key] = $inputGet; + $GLOBALS['HTTP_GET_VARS'][$key] = $inputGet; } - $GLOBALS['HTTP_GET_VARS'][$key] = $_GET[$key] = $inputGet; } elseif (is_array($inputGet)) { - t3lib_div::addSlashesOnArray($inputGet); - $GLOBALS['HTTP_GET_VARS'] = $_GET = $inputGet; + $_GET = $inputGet; + $GLOBALS['HTTP_GET_VARS'] = $inputGet; } } @@ -352,16 +414,22 @@ final class t3lib_div { * @param string GET/POST var to return * @param boolean If set, values are stripped of return values that are *arrays!* - string/integer values returned are always strip-slashed() * @return mixed POST var named $var and if not set, the GET var of the same name. - * @deprecated since TYPO3 3.6 - Use t3lib_div::_GP instead (ALWAYS delivers a value with un-escaped values!) + * @deprecated since TYPO3 3.6, will be removed in TYPO3 4.6 - Use t3lib_div::_GP instead (ALWAYS delivers a value with un-escaped values!) * @see _GP() */ - public static function GPvar($var,$strip=0) { + public static function GPvar($var, $strip = 0) { self::logDeprecatedFunction(); - if(empty($var)) return; + if (empty($var)) { + return; + } $value = isset($_POST[$var]) ? $_POST[$var] : $_GET[$var]; - if (isset($value) && is_string($value)) { $value = stripslashes($value); } // Originally check '&& get_magic_quotes_gpc() ' but the values of $_GET are always slashed regardless of get_magic_quotes_gpc() because HTTP_POST/GET_VARS are run through addSlashesOnArray in the very beginning of index_ts.php eg. - if ($strip && isset($value) && is_array($value)) { t3lib_div::stripSlashesOnArray($value); } + if (isset($value) && is_string($value)) { + $value = stripslashes($value); + } // Originally check '&& get_magic_quotes_gpc() ' but the values of $_GET are always slashed regardless of get_magic_quotes_gpc() because HTTP_POST/GET_VARS are run through addSlashesOnArray in the very beginning of index_ts.php eg. + if ($strip && isset($value) && is_array($value)) { + self::stripSlashesOnArray($value); + } return $value; } @@ -371,10 +439,10 @@ final class t3lib_div { * * @param string Key (variable name) from GET or POST vars * @return array Returns the GET vars merged recursively onto the POST vars. - * @deprecated since TYPO3 3.7 - Use t3lib_div::_GPmerged instead + * @deprecated since TYPO3 3.7, will be removed in TYPO3 4.6 - Use t3lib_div::_GPmerged instead * @see _GP() */ - public static function GParrayMerged($var) { + public static function GParrayMerged($var) { self::logDeprecatedFunction(); return self::_GPmerged($var); @@ -389,21 +457,13 @@ final class t3lib_div { * @param string Input string * @return string Input string with potential XSS code removed */ - public static function removeXSS($string) { - require_once(PATH_typo3.'contrib/RemoveXSS/RemoveXSS.php'); + public static function removeXSS($string) { + require_once(PATH_typo3 . 'contrib/RemoveXSS/RemoveXSS.php'); $string = RemoveXSS::process($string); return $string; } - - - - - - - - /************************* * * IMAGE FUNCTIONS @@ -415,35 +475,47 @@ final class t3lib_div { * Compressing a GIF file if not already LZW compressed. * This function is a workaround for the fact that ImageMagick and/or GD does not compress GIF-files to their minimun size (that is RLE or no compression used) * - * The function takes a file-reference, $theFile, and saves it again through GD or ImageMagick in order to compress the file - * GIF: - * If $type is not set, the compression is done with ImageMagick (provided that $GLOBALS['TYPO3_CONF_VARS']['GFX']['im_path_lzw'] is pointing to the path of a lzw-enabled version of 'convert') else with GD (should be RLE-enabled!) - * If $type is set to either 'IM' or 'GD' the compression is done with ImageMagick and GD respectively - * PNG: - * No changes. + * The function takes a file-reference, $theFile, and saves it again through GD or ImageMagick in order to compress the file + * GIF: + * If $type is not set, the compression is done with ImageMagick (provided that $GLOBALS['TYPO3_CONF_VARS']['GFX']['im_path_lzw'] is pointing to the path of a lzw-enabled version of 'convert') else with GD (should be RLE-enabled!) + * If $type is set to either 'IM' or 'GD' the compression is done with ImageMagick and GD respectively + * PNG: + * No changes. * - * $theFile is expected to be a valid GIF-file! - * The function returns a code for the operation. + * $theFile is expected to be a valid GIF-file! + * The function returns a code for the operation. * Usage: 9 * * @param string Filepath * @param string See description of function * @return string Returns "GD" if GD was used, otherwise "IM" if ImageMagick was used. If nothing done at all, it returns empty string. */ - public static function gif_compress($theFile, $type) { + public static function gif_compress($theFile, $type) { $gfxConf = $GLOBALS['TYPO3_CONF_VARS']['GFX']; - $returnCode=''; - if ($gfxConf['gif_compress'] && strtolower(substr($theFile,-4,4))=='.gif') { // GIF... - if (($type=='IM' || !$type) && $gfxConf['im'] && $gfxConf['im_path_lzw']) { // IM - $cmd = t3lib_div::imageMagickCommand('convert', '"'.$theFile.'" "'.$theFile.'"', $gfxConf['im_path_lzw']); - exec($cmd); + $returnCode = ''; + if ($gfxConf['gif_compress'] && strtolower(substr($theFile, -4, 4)) == '.gif') { // GIF... + if (($type == 'IM' || !$type) && $gfxConf['im'] && $gfxConf['im_path_lzw']) { // IM + // use temporary file to prevent problems with read and write lock on same file on network file systems + $temporaryName = dirname($theFile) . '/' . md5(uniqid()) . '.gif'; + // rename could fail, if a simultaneous thread is currently working on the same thing + if (@rename($theFile, $temporaryName)) { + $cmd = self::imageMagickCommand('convert', '"' . $temporaryName . '" "' . $theFile . '"', $gfxConf['im_path_lzw']); + t3lib_utility_Command::exec($cmd); + unlink($temporaryName); + } - $returnCode='IM'; - } elseif (($type=='GD' || !$type) && $gfxConf['gdlib'] && !$gfxConf['gdlib_png']) { // GD + $returnCode = 'IM'; + if (@is_file($theFile)) { + self::fixPermissions($theFile); + } + } elseif (($type == 'GD' || !$type) && $gfxConf['gdlib'] && !$gfxConf['gdlib_png']) { // GD $tempImage = imageCreateFromGif($theFile); imageGif($tempImage, $theFile); imageDestroy($tempImage); - $returnCode='GD'; + $returnCode = 'GD'; + if (@is_file($theFile)) { + self::fixPermissions($theFile); + } } } return $returnCode; @@ -457,17 +529,20 @@ final class t3lib_div { * @param string $theFile the filename with path * @return string new filename */ - public static function png_to_gif_by_imagemagick($theFile) { + public static function png_to_gif_by_imagemagick($theFile) { if ($GLOBALS['TYPO3_CONF_VARS']['FE']['png_to_gif'] - && $GLOBALS['TYPO3_CONF_VARS']['GFX']['im'] - && $GLOBALS['TYPO3_CONF_VARS']['GFX']['im_path_lzw'] - && strtolower(substr($theFile,-4,4))=='.png' - && @is_file($theFile)) { // IM - $newFile = substr($theFile,0,-4).'.gif'; - $cmd = t3lib_div::imageMagickCommand('convert', '"'.$theFile.'" "'.$newFile.'"', $GLOBALS['TYPO3_CONF_VARS']['GFX']['im_path_lzw']); - exec($cmd); - $theFile = $newFile; - // unlink old file?? May be bad idea bacause TYPO3 would then recreate the file every time as TYPO3 thinks the file is not generated because it's missing!! So do not unlink $theFile here!! + && $GLOBALS['TYPO3_CONF_VARS']['GFX']['im'] + && $GLOBALS['TYPO3_CONF_VARS']['GFX']['im_path_lzw'] + && strtolower(substr($theFile, -4, 4)) == '.png' + && @is_file($theFile)) { // IM + $newFile = substr($theFile, 0, -4) . '.gif'; + $cmd = self::imageMagickCommand('convert', '"' . $theFile . '" "' . $newFile . '"', $GLOBALS['TYPO3_CONF_VARS']['GFX']['im_path_lzw']); + t3lib_utility_Command::exec($cmd); + $theFile = $newFile; + if (@is_file($newFile)) { + self::fixPermissions($newFile); + } + // unlink old file?? May be bad idea bacause TYPO3 would then recreate the file every time as TYPO3 thinks the file is not generated because it's missing!! So do not unlink $theFile here!! } return $theFile; } @@ -481,37 +556,27 @@ final class t3lib_div { * @param boolean If set, then input file is converted to PNG, otherwise to GIF * @return string If the new image file exists, it's filepath is returned */ - public static function read_png_gif($theFile,$output_png=0) { - if ($GLOBALS['TYPO3_CONF_VARS']['GFX']['im'] && @is_file($theFile)) { - $ext = strtolower(substr($theFile,-4,4)); + public static function read_png_gif($theFile, $output_png = 0) { + if ($GLOBALS['TYPO3_CONF_VARS']['GFX']['im'] && @is_file($theFile)) { + $ext = strtolower(substr($theFile, -4, 4)); if ( - ((string)$ext=='.png' && $output_png) || - ((string)$ext=='.gif' && !$output_png) - ) { + ((string) $ext == '.png' && $output_png) || + ((string) $ext == '.gif' && !$output_png) + ) { return $theFile; } else { - $newFile = PATH_site.'typo3temp/readPG_'.md5($theFile.'|'.filemtime($theFile)).($output_png?'.png':'.gif'); - $cmd = t3lib_div::imageMagickCommand('convert', '"'.$theFile.'" "'.$newFile.'"', $GLOBALS['TYPO3_CONF_VARS']['GFX']['im_path']); - exec($cmd); - if (@is_file($newFile)) return $newFile; + $newFile = PATH_site . 'typo3temp/readPG_' . md5($theFile . '|' . filemtime($theFile)) . ($output_png ? '.png' : '.gif'); + $cmd = self::imageMagickCommand('convert', '"' . $theFile . '" "' . $newFile . '"', $GLOBALS['TYPO3_CONF_VARS']['GFX']['im_path']); + t3lib_utility_Command::exec($cmd); + if (@is_file($newFile)) { + self::fixPermissions($newFile); + return $newFile; + } } } } - - - - - - - - - - - - - /************************* * * STRING FUNCTIONS @@ -528,18 +593,18 @@ final class t3lib_div { * @param integer must be an integer with an absolute value of at least 4. if negative the string is cropped from the right end. * @param string String to append to the output if it is truncated, default is '...' * @return string new string - * @deprecated since TYPO3 4.1 - Works ONLY for single-byte charsets! Use t3lib_div::fixed_lgd_cs() instead + * @deprecated since TYPO3 4.1, will be removed in TYPO3 4.6 - Works ONLY for single-byte charsets! Use t3lib_div::fixed_lgd_cs() instead * @see fixed_lgd_pre() */ - public static function fixed_lgd($string,$origChars,$preStr='...') { + public static function fixed_lgd($string, $origChars, $preStr = '...') { self::logDeprecatedFunction(); $chars = abs($origChars); - if ($chars >= 4) { - if(strlen($string)>$chars) { + if ($chars >= 4) { + if (strlen($string) > $chars) { return $origChars < 0 ? - $preStr.trim(substr($string, -($chars-3))) : - trim(substr($string, 0, $chars-3)).$preStr; + $preStr . trim(substr($string, -($chars - 3))) : + trim(substr($string, 0, $chars - 3)) . $preStr; } } return $string; @@ -555,13 +620,13 @@ final class t3lib_div { * @param string string to truncate * @param integer must be an integer of at least 4 * @return string new string - * @deprecated since TYPO3 4.1 - Use either fixed_lgd() or fixed_lgd_cs() (with negative input value for $chars) + * @deprecated since TYPO3 4.1, will be removed in TYPO3 4.6 - Use t3lib_div::fixed_lgd_cs() instead (with negative input value for $chars) * @see fixed_lgd() */ - public static function fixed_lgd_pre($string,$chars) { + public static function fixed_lgd_pre($string, $chars) { self::logDeprecatedFunction(); - return strrev(t3lib_div::fixed_lgd(strrev($string),$chars)); + return strrev(self::fixed_lgd(strrev($string), $chars)); } /** @@ -573,7 +638,7 @@ final class t3lib_div { * @param string appendix to the truncated string * @return string cropped string */ - public static function fixed_lgd_cs($string, $chars, $appendString='...') { + public static function fixed_lgd_cs($string, $chars, $appendString = '...') { if (is_object($GLOBALS['LANG'])) { return $GLOBALS['LANG']->csConvObj->crop($GLOBALS['LANG']->charSet, $string, $chars, $appendString); } elseif (is_object($GLOBALS['TSFE'])) { @@ -581,7 +646,7 @@ final class t3lib_div { return $GLOBALS['TSFE']->csConvObj->crop($charSet, $string, $chars, $appendString); } else { // this case should not happen - $csConvObj = t3lib_div::makeInstance('t3lib_cs'); + $csConvObj = self::makeInstance('t3lib_cs'); return $csConvObj->crop('iso-8859-1', $string, $chars, $appendString); } } @@ -593,18 +658,18 @@ final class t3lib_div { * @param string The string to break up * @param string The string to implode the broken lines with (default/typically \n) * @param integer The line length - * @deprecated since TYPO3 4.1 - Use PHP function wordwrap() + * @deprecated since TYPO3 4.1, will be removed in TYPO3 4.6 - Use PHP function wordwrap() * @return string */ - public static function breakTextForEmail($str,$implChar="\n",$charWidth=76) { + public static function breakTextForEmail($str, $implChar = LF, $charWidth = 76) { self::logDeprecatedFunction(); - $lines = explode(chr(10),$str); - $outArr=array(); + $lines = explode(LF, $str); + $outArr = array(); foreach ($lines as $lStr) { - $outArr[] = t3lib_div::breakLinesForEmail($lStr,$implChar,$charWidth); + $outArr[] = self::breakLinesForEmail($lStr, $implChar, $charWidth); } - return implode(chr(10),$outArr); + return implode(LF, $outArr); } /** @@ -617,31 +682,35 @@ final class t3lib_div { * @return string * @see breakTextForEmail() */ - public static function breakLinesForEmail($str,$implChar="\n",$charWidth=76) { - $lines=array(); - $l=$charWidth; - $p=0; - while(strlen($str)>$p) { - $substr=substr($str,$p,$l); - if (strlen($substr)==$l) { - $count = count(explode(' ',trim(strrev($substr)))); - if ($count>1) { // OK... - $parts = explode(' ',strrev($substr),2); + public static function breakLinesForEmail($str, $implChar = LF, $charWidth = 76) { + $lines = array(); + $l = $charWidth; + $p = 0; + while (strlen($str) > $p) { + $substr = substr($str, $p, $l); + if (strlen($substr) == $l) { + $count = count(explode(' ', trim(strrev($substr)))); + if ($count > 1) { // OK... + $parts = explode(' ', strrev($substr), 2); $theLine = strrev($parts[1]); } else { - $afterParts = explode(' ',substr($str,$l+$p),2); - $theLine = $substr.$afterParts[0]; + $afterParts = explode(' ', substr($str, $l + $p), 2); + $theLine = $substr . $afterParts[0]; } - if (!strlen($theLine)) {break; } // Error, because this would keep us in an endless loop. + if (!strlen($theLine)) { + break; + } // Error, because this would keep us in an endless loop. } else { - $theLine=$substr; + $theLine = $substr; } - $lines[]=trim($theLine); - $p+=strlen($theLine); - if (!trim(substr($str,$p,$l))) break; // added... + $lines[] = trim($theLine); + $p += strlen($theLine); + if (!trim(substr($str, $p, $l))) { + break; + } // added... } - return implode($implChar,$lines); + return implode($implChar, $lines); } /** @@ -653,17 +722,17 @@ final class t3lib_div { * @param string $list is a comma-list of IP-addresses to match with. *-wildcard allowed instead of number, plus leaving out parts in the IP number is accepted as wildcard (eg. 192.168.*.* equals 192.168). If list is "*" no check is done and the function returns TRUE immediately. An empty list always returns FALSE. * @return boolean True if an IP-mask from $list matches $baseIP */ - public static function cmpIP($baseIP, $list) { + public static function cmpIP($baseIP, $list) { $list = trim($list); - if ($list === '') { - return false; - } elseif ($list === '*') { - return true; + if ($list === '') { + return FALSE; + } elseif ($list === '*') { + return TRUE; } - if (strpos($baseIP, ':') !== false && t3lib_div::validIPv6($baseIP)) { - return t3lib_div::cmpIPv6($baseIP, $list); + if (strpos($baseIP, ':') !== FALSE && self::validIPv6($baseIP)) { + return self::cmpIPv6($baseIP, $list); } else { - return t3lib_div::cmpIPv4($baseIP, $list); + return self::cmpIPv4($baseIP, $list); } } @@ -674,38 +743,40 @@ final class t3lib_div { * @param string $list is a comma-list of IP-addresses to match with. *-wildcard allowed instead of number, plus leaving out parts in the IP number is accepted as wildcard (eg. 192.168.*.* equals 192.168) * @return boolean True if an IP-mask from $list matches $baseIP */ - public static function cmpIPv4($baseIP, $list) { - $IPpartsReq = explode('.',$baseIP); - if (count($IPpartsReq)==4) { - $values = t3lib_div::trimExplode(',',$list,1); + public static function cmpIPv4($baseIP, $list) { + $IPpartsReq = explode('.', $baseIP); + if (count($IPpartsReq) == 4) { + $values = self::trimExplode(',', $list, 1); - foreach($values as $test) { - list($test,$mask) = explode('/',$test); + foreach ($values as $test) { + list($test, $mask) = explode('/', $test); - if(intval($mask)) { + if (intval($mask)) { // "192.168.3.0/24" $lnet = ip2long($test); $lip = ip2long($baseIP); - $binnet = str_pad( decbin($lnet),32,'0','STR_PAD_LEFT'); - $firstpart = substr($binnet,0,$mask); - $binip = str_pad( decbin($lip),32,'0','STR_PAD_LEFT'); - $firstip = substr($binip,0,$mask); - $yes = (strcmp($firstpart,$firstip)==0); + $binnet = str_pad(decbin($lnet), 32, '0', STR_PAD_LEFT); + $firstpart = substr($binnet, 0, $mask); + $binip = str_pad(decbin($lip), 32, '0', STR_PAD_LEFT); + $firstip = substr($binip, 0, $mask); + $yes = (strcmp($firstpart, $firstip) == 0); } else { // "192.168.*.*" - $IPparts = explode('.',$test); + $IPparts = explode('.', $test); $yes = 1; foreach ($IPparts as $index => $val) { $val = trim($val); - if (strcmp($val,'*') && strcmp($IPpartsReq[$index],$val)) { - $yes=0; + if (strcmp($val, '*') && strcmp($IPpartsReq[$index], $val)) { + $yes = 0; } } } - if ($yes) return true; + if ($yes) { + return TRUE; + } } } - return false; + return FALSE; } /** @@ -715,55 +786,68 @@ final class t3lib_div { * @param string $list is a comma-list of IPv6 prefixes, could also contain IPv4 addresses * @return boolean True if an baseIP matches any prefix */ - public static function cmpIPv6($baseIP, $list) { - $success = false; // Policy default: Deny connection - $baseIP = t3lib_div::normalizeIPv6($baseIP); + public static function cmpIPv6($baseIP, $list) { + $success = FALSE; // Policy default: Deny connection + $baseIP = self::normalizeIPv6($baseIP); - $values = t3lib_div::trimExplode(',',$list,1); - foreach ($values as $test) { - list($test,$mask) = explode('/',$test); - if (t3lib_div::validIPv6($test)) { - $test = t3lib_div::normalizeIPv6($test); - if (intval($mask)) { - switch ($mask) { // test on /48 /64 - case '48': - $testBin = substr(t3lib_div::IPv6Hex2Bin($test), 0, 48); - $baseIPBin = substr(t3lib_div::IPv6Hex2Bin($baseIP), 0, 48); - $success = strcmp($testBin, $baseIPBin)==0 ? true : false; - break; - case '64': - $testBin = substr(t3lib_div::IPv6Hex2Bin($test), 0, 64); - $baseIPBin = substr(t3lib_div::IPv6Hex2Bin($baseIP), 0, 64); - $success = strcmp($testBin, $baseIPBin)==0 ? true : false; - break; - default: - $success = false; - } + $values = self::trimExplode(',', $list, 1); + foreach ($values as $test) { + $testList = explode('/', $test); + if (count($testList) == 2) { + list($test, $mask) = $testList; + } else { + $mask = FALSE; + } + + if (self::validIPv6($test)) { + $test = self::normalizeIPv6($test); + $maskInt = intval($mask) ? intval($mask) : 128; + if ($mask === '0') { // special case; /0 is an allowed mask - equals a wildcard + $success = TRUE; + } elseif ($maskInt == 128) { + $success = ($test === $baseIP); } else { - if (t3lib_div::validIPv6($test)) { // test on full ip address 128 bits - $testBin = t3lib_div::IPv6Hex2Bin($test); - $baseIPBin = t3lib_div::IPv6Hex2Bin($baseIP); - $success = strcmp($testBin, $baseIPBin)==0 ? true : false; + $testBin = self::IPv6Hex2Bin($test); + $baseIPBin = self::IPv6Hex2Bin($baseIP); + $success = TRUE; + + // modulo is 0 if this is a 8-bit-boundary + $maskIntModulo = $maskInt % 8; + $numFullCharactersUntilBoundary = intval($maskInt / 8); + + if (substr($testBin, 0, $numFullCharactersUntilBoundary) !== substr($baseIPBin, 0, $numFullCharactersUntilBoundary)) { + $success = FALSE; + } elseif ($maskIntModulo > 0) { + // if not an 8-bit-boundary, check bits of last character + $testLastBits = str_pad(decbin(ord(substr($testBin, $numFullCharactersUntilBoundary, 1))), 8, '0', STR_PAD_LEFT); + $baseIPLastBits = str_pad(decbin(ord(substr($baseIPBin, $numFullCharactersUntilBoundary, 1))), 8, '0', STR_PAD_LEFT); + if (strncmp($testLastBits, $baseIPLastBits, $maskIntModulo) != 0) { + $success = FALSE; + } } } } - if ($success) return true; + if ($success) { + return TRUE; + } } - return false; + return FALSE; } /** - * [Describe function...] + * Transform a regular IPv6 address from hex-representation into binary * - * @param [type] $hex: ... - * @return [type] ... + * @param string $hex IPv6 address in hex-presentation + * @return string Binary representation (16 characters, 128 characters) + * @see normalizeIPv6() */ - public static function IPv6Hex2Bin ($hex) { - $bin = ''; - $hex = str_replace(':', '', $hex); // Replace colon to nothing - for ($i=0; $i