1
0
mirror of https://github.com/e107inc/e107.git synced 2025-07-31 20:00:37 +02:00

Multibyte utf8 method tests and fixes.

This commit is contained in:
Cameron
2021-01-15 09:03:07 -08:00
parent 8cfb8d80cc
commit 13517e83a8
5 changed files with 178 additions and 192 deletions

View File

@@ -133,18 +133,6 @@ if(!defined('e_ROOT'))
unset($e_ROOT);
}
// MOVED TO $e107->prepare_request()
// e107 uses relative url's, which are broken by "pretty" URL's. So for now we don't support / after .php
//if(($pos = strpos($_SERVER['PHP_SELF'], '.php/')) !== false) // redirect bad URLs to the correct one.
//{
// $new_url = substr($_SERVER['PHP_SELF'], 0, $pos+4);
// $new_loc = ($_SERVER['QUERY_STRING']) ? $new_url.'?'.$_SERVER['QUERY_STRING'] : $new_url;
// header('Location: '.$new_loc);
// exit();
//}
// If url contains a .php in it, PHP_SELF is set wrong (imho), affecting all paths. We need to 'fix' it if it does.
//$_SERVER['PHP_SELF'] = (($pos = strpos($_SERVER['PHP_SELF'], '.php')) !== false ? substr($_SERVER['PHP_SELF'], 0, $pos+4) : $_SERVER['PHP_SELF']);
//
// D: Setup PHP error handling
// (Now we can see PHP errors) -- but note that DEBUG is not yet enabled!
@@ -158,15 +146,6 @@ $error_handler = new error_handler();
define('e107_INIT', true);
// MOVED TO $e107->prepare_request()
// setup some php options
//e107_ini_set('magic_quotes_runtime', 0);
//e107_ini_set('magic_quotes_sybase', 0);
//e107_ini_set('arg_separator.output', '&');
//e107_ini_set('session.use_only_cookies', 1);
//e107_ini_set('session.use_trans_sid', 0);
// DEPRECATED, use e107::getConfig() and e107::getPlugConfig()
if(isset($retrieve_prefs) && is_array($retrieve_prefs))
{
@@ -180,47 +159,6 @@ else
unset($retrieve_prefs);
}
// MOVED TO e107->set_constants()
//define("MAGIC_QUOTES_GPC", (ini_get('magic_quotes_gpc') ? true : false));
//
//// Define the domain name and subdomain name.
//if($_SERVER['HTTP_HOST'] && is_numeric(str_replace(".","",$_SERVER['HTTP_HOST'])))
//{
// $srvtmp = ''; // Host is an IP address.
//}
//else
//{
// $srvtmp = explode('.',str_replace('www.', '', $_SERVER['HTTP_HOST']));
//}
//
//define('e_SUBDOMAIN', (count($srvtmp)>2 && $srvtmp[2] ? $srvtmp[0] : false)); // needs to be available to e107_config.
//
//if(e_SUBDOMAIN)
//{
// unset($srvtmp[0]);
//}
//
//define('e_DOMAIN',(count($srvtmp) > 1 ? (implode('.', $srvtmp)) : false)); // if it's an IP it must be set to false.
//
//unset($srvtmp);
// MOVED TO $e107->prepare_request()
// Ensure thet '.' is the first part of the include path
//$inc_path = explode(PATH_SEPARATOR, ini_get('include_path'));
//if($inc_path[0] != '.')
//{
// array_unshift($inc_path, '.');
// $inc_path = implode(PATH_SEPARATOR, $inc_path);
// e107_ini_set('include_path', $inc_path);
//}
//unset($inc_path);
//
// F: Grab e107_config, get directory paths and create $e107 object
//
@include(e_ROOT.'e107_config.php');
if(!defined('e_POWEREDBY_DISABLE'))
@@ -311,59 +249,15 @@ if(!defined('e_SECURITY_LEVEL'))
define('e_SECURITY_LEVEL', e_session::SECURITY_LEVEL_BALANCED);
}
// MOVED TO $e107->set_request()
//$inArray = array("'", ';', '/**/', '/UNION/', '/SELECT/', 'AS ');
//if (strpos($_SERVER['PHP_SELF'], 'trackback') === false)
//{
// foreach($inArray as $res)
// {
// if(stristr($_SERVER['QUERY_STRING'], $res))
// {
// die('Access denied.');
// }
// }
//}
//
// Start the parser; use it to grab the full query string
//
//DEPRECATED, BC
//$e107->url = e107::getUrl(); - caught by __get()
//TODO - find & replace $e107->url
//DEPRECATED, BC, $e107->tp caught by __get()
if(!isset($_E107['no_parser']))
{
$tp = e107::getParser(); //TODO - find & replace $tp, $e107->tp
}
//define("e_QUERY", $matches[2]);
//define("e_QUERY", $_SERVER['QUERY_STRING']);
// MOVED TO $e107->set_request()
//$e_QUERY = str_replace("&","&",$tp->post_toForm($e_QUERY));
//define('e_QUERY', $e_QUERY);
//$e_QUERY = e_QUERY;
// MOVED TO $e107->set_request()
//define('e_TBQS', $_SERVER['QUERY_STRING']);
//$_SERVER['QUERY_STRING'] = e_QUERY;
// MOVED TO $e107->set_constants()
//define('e_UC_PUBLIC', 0);
//define('e_UC_MAINADMIN', 250);
//define('e_UC_READONLY', 251);
//define('e_UC_GUEST', 252);
//define('e_UC_MEMBER', 253);
//define('e_UC_ADMIN', 254);
//define('e_UC_NOBODY', 255);
// MOVED TO $e107->set_urls() - DEPRECATED, use e107->getFolder()
//define('ADMINDIR', $ADMIN_DIRECTORY);
//
// H: Initialize debug handling
// (NO E107 DEBUG CONSTANTS OR CODE ARE AVAILABLE BEFORE THIS POINT)
@@ -371,8 +265,12 @@ if(!isset($_E107['no_parser']))
// i.e. from here on you can use E107_DEBUG_LEVEL or any
// E107_DBG_* constant for debug testing.
//
require_once(e_HANDLER.'debug_handler.php');
e107_debug::init(); // defines E107_DEBUG_LEVEL
/** @var e107_db_debug $dbg */
$dbg = e107::getDebug();
if(E107_DEBUG_LEVEL)
@@ -400,8 +298,6 @@ if(E107_DEBUG_LEVEL)
e107::getSingleton('e107_traffic'); // We start traffic counting ASAP
//$eTraffic->Calibrate($eTraffic);
// e107_require_once(e_HANDLER.'mysql_class.php');
//DEPRECATED, BC, $e107->sql caught by __get()
/** @var e_db $sql */
$sql = e107::getDb(); //TODO - find & replace $sql, $e107->sql

View File

@@ -3,17 +3,16 @@
function setimage_shortcode($parm, $mode='')
{
### Reset to defaults TODO site prefs
if(isset($parm['default']))
{
$parm['w'] = 100;
$parm['h'] = 0;
$parm['crop'] = 0;
}
e107::getParser()->thumbWidth = varset($parm['w'],0);
e107::getParser()->thumbHeight = varset($parm['h'],0);
e107::getParser()->thumbCrop = varset($parm['crop'],0);
e107::getParser()->thumbWidth(varset($parm['w'],0));
e107::getParser()->thumbHeight(varset($parm['h'],0));
e107::getParser()->thumbCrop(varset($parm['crop'],0));
}
?>

View File

@@ -30,21 +30,20 @@ class e_parse
*
* @var integer
*/
protected $utfAction;
private $multibyte = false; // previously $utfAction
// Profanity filter
public $e_pf;
private $e_pf;
// Emote filter
public $e_emote;
private $e_emote;
// 'Hooked' parsers (array)
protected $e_hook = array();
private $e_hook = array();
public $search = array(''', ''', ''', '"', 'onerror', '>', '"', ' & ');
private $search = array(''', ''', ''', '"', 'onerror', '>', '"', ' & ');
public $replace = array("'", "'", "'", '"', 'one<i></i>rror', '>', '"', ' &amp; ');
private $replace = array("'", "'", "'", '"', 'one<i></i>rror', '>', '"', ' &amp; ');
// Set to TRUE or FALSE once it has been calculated
protected $e_highlighting;
@@ -52,11 +51,11 @@ class e_parse
// Highlight query
protected $e_query;
public $thumbWidth = 100;
private $thumbWidth = 100;
public $thumbHeight = 0;
private $thumbHeight = 0;
public $thumbCrop = 0;
private $thumbCrop = 0;
private $thumbEncode = 0;
@@ -315,10 +314,6 @@ class e_parse
$this->init();
$this->compileAttributeDefaults();
$this->initCharset();
}
public function getModifierList()
@@ -335,78 +330,65 @@ class e_parse
*
* @return void
*/
private function initCharset()
public function setMultibyte($bool)
{
// Start by working out what, if anything, we do about utf-8 handling.
// 'Do nothing' is the simple option
$this->utfAction = 0;
if(PHP_MAJOR_VERSION < 6 && extension_loaded('mbstring'))
// var_dump(e_LAN);
// var_dump(e_LANGUAGE);
if($bool === false)
{
// Check for function overloading
$temp = ini_get('mbstring.func_overload');
// Just check the string functions - will be non-zero if overloaded
if(defined('MB_OVERLOAD_STRING') && ($temp & MB_OVERLOAD_STRING) == 0)
{
// Can use the mb_string routines
$this->utfAction = 1;
$this->multibyte = false;
return null;
}
// Set the default encoding, so we don't have to specify every time
if(extension_loaded('mbstring'))
{
$this->multibyte = true;
mb_internal_encoding('UTF-8');
}
}
/**
* Unicode (UTF-8) analogue of standard @link http://php.net/strlen strlen PHP function.
* Returns the length of the given string.
* Unicode (UTF-8) analogue of standard @link http://php.net/strlen strlen PHP function.
*
* @param string $str The UTF-8 encoded string being measured for length.
* @return integer The length (amount of UTF-8 characters) of the string on success, and 0 if the string is empty.
*/
public function ustrlen($str)
{
switch($this->utfAction)
if($this->multibyte)
{
case 0:
return strlen($str);
case 1:
return mb_strlen($str);
}
// Default case shouldn't happen often
// Save a call - invoke the function directly
return strlen(utf8_decode($str));
return strlen($str);
// return strlen(utf8_decode($str));
}
/**
* Unicode (UTF-8) analogue of standard @link http://php.net/strtolower strtolower PHP function.
* Make a string lowercase.
* Unicode (UTF-8) analogue of standard @link http://php.net/strtolower strtolower PHP function.
*
* @param string $str The UTF-8 encoded string to be lowercased.
* @return string Specified string with all alphabetic characters converted to lowercase.
*/
public function ustrtolower($str)
{
switch($this->utfAction)
if($this->multibyte)
{
case 1:
return mb_strtolower($str);
case 0:
default:
return strtolower($str);
}
return strtolower($str);
}
/**
* Unicode (UTF-8) analogue of standard @link http://php.net/strtoupper strtoupper PHP function.
* Make a string uppercase.
* Unicode (UTF-8) analogue of standard @link http://php.net/strtoupper strtoupper PHP function.
*
* @param string $str The UTF-8 encoded string to be uppercased.
* @return string Specified string with all alphabetic characters converted to uppercase.
@@ -414,7 +396,7 @@ class e_parse
public function ustrtoupper($str)
{
if($this->utfAction === 1)
if($this->multibyte)
{
return mb_strtoupper($str);
}
@@ -425,8 +407,9 @@ class e_parse
/**
* Unicode (UTF-8) analogue of standard @link http://php.net/strpos strpos PHP function.
* Find the position of the first occurrence of a case-sensitive UTF-8 encoded string.
* Unicode (UTF-8) analogue of standard @link http://php.net/strpos strpos PHP function.
*
* Returns the numeric position (offset in amount of UTF-8 characters)
* of the first occurrence of needle in the haystack string.
*
@@ -439,7 +422,7 @@ class e_parse
public function ustrpos($haystack, $needle, $offset = 0)
{
if($this->utfAction === 1)
if($this->multibyte)
{
return mb_strpos($haystack, $needle, $offset);
}
@@ -449,8 +432,8 @@ class e_parse
/**
* Unicode (UTF-8) analogue of standard @link http://php.net/strrpos strrpos PHP function.
* Find the position of the last occurrence of a case-sensitive UTF-8 encoded string.
* Unicode (UTF-8) analogue of standard @link http://php.net/strrpos strrpos PHP function.
* Returns the numeric position (offset in amount of UTF-8 characters)
* of the last occurrence of needle in the haystack string.
*
@@ -462,45 +445,39 @@ class e_parse
*/
public function ustrrpos($haystack, $needle, $offset = 0)
{
if($this->utfAction === 1)
if($this->multibyte)
{
return mb_strrpos($haystack, $needle, $offset);
}
return strrpos($haystack, $needle, $offset);
}
/**
* Unicode (UTF-8) analogue of standard @link http://php.net/stristr stristr PHP function.
* Returns all of haystack starting from and including the first occurrence of needle to the end.
* Unicode (UTF-8) analogue of standard @link http://php.net/stristr stristr PHP function.
*
* @param string $haystack The UTF-8 encoded string to search in.
* @param mixed $needle If needle is not a string, it is converted to an integer and applied as the ordinal value of a character.
* @param integer $before_needle [optional] (PHP 5.3+) If TRUE, returns the part of the haystack before the first occurrence of the needle (excluding needle).
* @param bool $before_needle [optional] (PHP 5.3+) If TRUE, returns the part of the haystack before the first occurrence of the needle (excluding needle).
* @return string Returns the matched substring. If needle is not found, returns FALSE.
*/
public function ustristr($haystack, $needle, $before_needle = false)
{
switch($this->utfAction)
if($this->multibyte)
{
case 0:
return stristr($haystack, $needle, $before_needle);
case 1:
//return mb_substr($haystack, $needle, $before_needle);
return mb_stristr($haystack, $needle, $before_needle);
}
// No utf8 pack backup
return stristr($haystack, $needle, $before_needle);
}
/**
* Unicode (UTF-8) analogue of standard @link http://php.net/substr substr PHP function.
* Returns the portion of string specified by the start and length parameters.
* Unicode (UTF-8) analogue of standard @link http://php.net/substr substr PHP function.
*
* NOTE: May be subtle differences in return values dependent on which routine is used.
* Native substr() routine can return FALSE. mb_substr() and utf8_substr() just return an empty string.
@@ -515,7 +492,7 @@ class e_parse
public function usubstr($str, $start, $length = null)
{
if($this->utfAction === 1)
if($this->multibyte)
{
return ($length === null) ? mb_substr($str, $start) : mb_substr($str, $start, $length);
}

View File

@@ -380,6 +380,9 @@ class language{
{
trigger_error('<b>'.__METHOD__.' is deprecated.</b> Use $tp->lanVars() instead.', E_USER_DEPRECATED); // NO LAN
$search = array();
$replace = array();
foreach($array as $k=>$v)
{
$search[] = "[".$k."]";
@@ -656,6 +659,12 @@ class language{
$this->e_language = $user_language;
$this->setDefs();
if(e_LAN !== 'en')
{
e107::getParser()->setMultibyte(true);
}
return;
}

View File

@@ -286,17 +286,54 @@ while(&#036;row = &#036;sql-&gt;fetch())
{
}
*/
/* public function testTextclean()
{
$string = "\n\n\nSomething\n\n\n";
$result = $this->tp->textclean($string);
var_export($result);
//$this->assertSame();
}*/
public function testTextclean()
public function testMultibyte()
{
}
// enable multibyte mode.
$this->tp->setMultibyte(true);
$input = "русские";
// strtoupper
$result = $this->tp->ustrtoupper($input);
$this->assertEquals('РУССКИЕ', $result);
// strlen
$result = $this->tp->ustrlen($input);
$this->assertEquals(7, $result);
// strtolower
$result = $this->tp->ustrtolower('РУССКИЕ');
$this->assertEquals($input, $result);
// strpos
$result = $this->tp->ustrpos($input, 'и');
$this->assertEquals(5, $result);
// substr
$result = $this->tp->usubstr($input, 0, 5);
$this->assertEquals('русск', $result);
// stristr
$result = $this->tp->ustristr($input, 'ские', true);
$this->assertEquals('рус', $result);
// strrpos (last occurance of a string)
$result = $this->tp->ustrrpos($input, 'с');
$this->assertEquals(3, $result);
public function testUstrtoupper()
{
}
/*
public function testUstrlen()
{
@@ -511,12 +548,67 @@ while(&#036;row = &#036;sql-&gt;fetch())
}
/*
public function testHtml_truncate()
{
$this->tp->setMultibyte(true);
$tests = array(
0 => array(
'input' => '<p>Lorem ipsum dolor sit amet.</p>',
'expected' => '<p>Lorem ipsum dolor...</p>',
),
1 => array(
'input' => '<p>Lorem ipsum <a href="">dolor</a> sit amet.</p>',
'expected' => '<p>Lorem ipsum <a href="">dolor...</a></p>',
),
2 => array(
'input' => '<p>Lorem ipsum <img src="#" style="width:100px" /> dolor</img> sit amet.</p>',
'expected' => '<p>Lorem ipsum <img src="#" style="width:100px" /> dolo...</p>',
),
3 => array(
'input' => '<p>Это <a href="#">предложение на русском</a> языке</p>',
'expected' => '<p>Это <a href="#">предложение н...</a></p>',
),
4 => array(
'input' => '<p>Lorem ipsum &amp; dolor sit amet.</p>',
'expected' => '<p>Lorem ipsum &amp; dol...</p>',
),
5 => array(
'input' => '<p>Это <a href="#">предложение на русском</a> языке</p>',
'expected' => '<p>Это <a href="#">предложение...</a></p>',
'exact' => false,
),
/* 6 => array(
'input' => '<script>$();</script><!-- Start div --><div>Lorem</div><!-- End div --> ipsum dolor sit amet',
'expected' => '',
),
*/
);
foreach($tests as $index => $var)
{
if(empty($var['input']))
{
continue;
}
$exact = isset($var['exact']) ? $var['exact']: true;
$result = $this->tp->html_truncate($var['input'], 17, '...', $exact);
if(empty($var['expected']))
{
echo $result."\n\n";
continue;
}
$this->assertSame($var['expected'], $result, "Failed on test #".$index);
}
}
/*
public function testCheckHighlighting()
{
@@ -558,9 +650,13 @@ while(&#036;row = &#036;sql-&gt;fetch())
/*
public function testHtmlwrap()
{
$html = "<div><p>My paragraph <b>bold</b></p></div>";
$result = $this->tp->htmlwrap($html, 20);
var_dump($result);
}*/
}
*/
public function testToRss()
{
/* if(PHP_VERSION_ID < 71000 )
@@ -706,12 +802,21 @@ while(&#036;row = &#036;sql-&gt;fetch())
}
/*
public function testText_truncate()
{
$string = "This is a long string that will be truncated." ;
$expected = 'This is a long ... ';
$result = $this->tp->text_truncate($string, 20);
$this->assertSame($expected, $result);
$string = "This is has something &amp; something" ;
$expected = 'This is has something & ... ';
$result = $this->tp->text_truncate($string, 29);
$this->assertSame($expected, $result);
}
/*
public function testSetThumbSize()
{