mirror of
https://github.com/processwire/processwire.git
synced 2025-08-09 08:17:12 +02:00
Fix issue processwire/processwire-issues#192 where inserted emoji could cause text to be truncated on systems using dbEngine "utf8" (as opposed to "utf8mb4"). Because the emoji/MB4 detection and replacement has some overhead, it's not enabled by default. To enable, set $config->dbStripMB4=true; in your /site/config.php file.
This commit is contained in:
@@ -912,6 +912,17 @@ $config->dbSocket = '';
|
|||||||
*/
|
*/
|
||||||
$config->dbQueryLogMax = 500;
|
$config->dbQueryLogMax = 500;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Remove 4-byte characters (like emoji) when dbEngine is not utf8mb4?
|
||||||
|
*
|
||||||
|
* When charset is not “utf8mb4” and this value is true, 4-byte UTF-8 characters are stripped
|
||||||
|
* out of inserted values when possible. Note that this can add some overhead to INSERTs.
|
||||||
|
*
|
||||||
|
* @var bool
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
$config->dbStripMB4 = false;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/*** 8. MODULES *********************************************************************************/
|
/*** 8. MODULES *********************************************************************************/
|
||||||
|
@@ -104,7 +104,7 @@
|
|||||||
* @property array $dbSqlModes Set or adjust SQL mode per MySQL version, where array keys are MySQL version and values are SQL mode command(s). #pw-group-database
|
* @property array $dbSqlModes Set or adjust SQL mode per MySQL version, where array keys are MySQL version and values are SQL mode command(s). #pw-group-database
|
||||||
* @property int $dbQueryLogMax Maximum number of queries WireDatabasePDO will log in memory, when debug mode is enabled (default=1000). #pw-group-database
|
* @property int $dbQueryLogMax Maximum number of queries WireDatabasePDO will log in memory, when debug mode is enabled (default=1000). #pw-group-database
|
||||||
* @property string $dbInitCommand Database init command, for PDO::MYSQL_ATTR_INIT_COMMAND. Note placeholder {charset} gets replaced with $config->dbCharset. #pw-group-database
|
* @property string $dbInitCommand Database init command, for PDO::MYSQL_ATTR_INIT_COMMAND. Note placeholder {charset} gets replaced with $config->dbCharset. #pw-group-database
|
||||||
* $property array $dbSqlModes Set, add or remove SQL mode based on MySQL version. See default in /wire/config.php for details. #pw-group-database
|
* @property bool $dbStripMB4 When dbEngine is not utf8mb4 and this is true, we will attempt to remove 4-byte characters (like emoji) from inserts when possible. Note that this adds some overhead. #pw-group-database
|
||||||
*
|
*
|
||||||
* @property array $pageList Settings specific to Page lists. #pw-group-modules
|
* @property array $pageList Settings specific to Page lists. #pw-group-modules
|
||||||
* @property array $pageEdit Settings specific to Page editors. #pw-group-modules
|
* @property array $pageEdit Settings specific to Page editors. #pw-group-modules
|
||||||
|
@@ -38,6 +38,11 @@ class PagesEditor extends Wire {
|
|||||||
|
|
||||||
public function __construct(Pages $pages) {
|
public function __construct(Pages $pages) {
|
||||||
$this->pages = $pages;
|
$this->pages = $pages;
|
||||||
|
|
||||||
|
$config = $pages->wire('config');
|
||||||
|
if($config->dbStripMB4 && strtolower($config->dbEngine) != 'utf8mb4') {
|
||||||
|
$this->addHookAfter('Fieldtype::sleepValue', $this, 'hookFieldtypeSleepValueStripMB4');
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public function isCloning() {
|
public function isCloning() {
|
||||||
@@ -1384,4 +1389,16 @@ class PagesEditor extends Wire {
|
|||||||
|
|
||||||
return count($sorts);
|
return count($sorts);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Hook after Fieldtype::sleepValue to remove MB4 characters when present and applicable
|
||||||
|
*
|
||||||
|
* This hook is only used if $config->dbStripMB4 is true and $config->dbEngine is not “utf8mb4”.
|
||||||
|
*
|
||||||
|
* @param HookEvent $event
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
protected function hookFieldtypeSleepValueStripMB4(HookEvent $event) {
|
||||||
|
$event->return = $this->wire('sanitizer')->removeMB4($event->return);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@@ -918,6 +918,7 @@ class Sanitizer extends Wire {
|
|||||||
* - `maxLength` (int): maximum characters allowed, or 0=no max (default=255).
|
* - `maxLength` (int): maximum characters allowed, or 0=no max (default=255).
|
||||||
* - `maxBytes` (int): maximum bytes allowed (default=0, which implies maxLength*4).
|
* - `maxBytes` (int): maximum bytes allowed (default=0, which implies maxLength*4).
|
||||||
* - `stripTags` (bool): strip markup tags? (default=true).
|
* - `stripTags` (bool): strip markup tags? (default=true).
|
||||||
|
* - `stripMB4` (bool): strip emoji and other 4-byte UTF-8? (default=false).
|
||||||
* - `allowableTags` (string): markup tags that are allowed, if stripTags is true (use same format as for PHP's `strip_tags()` function.
|
* - `allowableTags` (string): markup tags that are allowed, if stripTags is true (use same format as for PHP's `strip_tags()` function.
|
||||||
* - `multiLine` (bool): allow multiple lines? if false, then $newlineReplacement below is applicable (default=false).
|
* - `multiLine` (bool): allow multiple lines? if false, then $newlineReplacement below is applicable (default=false).
|
||||||
* - `newlineReplacement` (string): character to replace newlines with, OR specify boolean TRUE to remove extra lines (default=" ").
|
* - `newlineReplacement` (string): character to replace newlines with, OR specify boolean TRUE to remove extra lines (default=" ").
|
||||||
@@ -933,6 +934,7 @@ class Sanitizer extends Wire {
|
|||||||
'maxLength' => 255, // maximum characters allowed, or 0=no max
|
'maxLength' => 255, // maximum characters allowed, or 0=no max
|
||||||
'maxBytes' => 0, // maximum bytes allowed (0 = default, which is maxLength*4)
|
'maxBytes' => 0, // maximum bytes allowed (0 = default, which is maxLength*4)
|
||||||
'stripTags' => true, // strip markup tags
|
'stripTags' => true, // strip markup tags
|
||||||
|
'stripMB4' => false, // strip Emoji and 4-byte characters?
|
||||||
'allowableTags' => '', // tags that are allowed, if stripTags is true (use same format as for PHP's strip_tags function)
|
'allowableTags' => '', // tags that are allowed, if stripTags is true (use same format as for PHP's strip_tags function)
|
||||||
'multiLine' => false, // allow multiple lines? if false, then $newlineReplacement below is applicable
|
'multiLine' => false, // allow multiple lines? if false, then $newlineReplacement below is applicable
|
||||||
'newlineReplacement' => ' ', // character to replace newlines with, OR specify boolean TRUE to remove extra lines
|
'newlineReplacement' => ' ', // character to replace newlines with, OR specify boolean TRUE to remove extra lines
|
||||||
@@ -964,6 +966,8 @@ class Sanitizer extends Wire {
|
|||||||
|
|
||||||
if($options['inCharset'] != $options['outCharset']) $value = iconv($options['inCharset'], $options['outCharset'], $value);
|
if($options['inCharset'] != $options['outCharset']) $value = iconv($options['inCharset'], $options['outCharset'], $value);
|
||||||
|
|
||||||
|
if($options['stripMB4']) $value = $this->removeMB4($value);
|
||||||
|
|
||||||
if($options['maxLength']) {
|
if($options['maxLength']) {
|
||||||
if(empty($options['maxBytes'])) $options['maxBytes'] = $options['maxLength'] * 4;
|
if(empty($options['maxBytes'])) $options['maxBytes'] = $options['maxLength'] * 4;
|
||||||
if($this->multibyteSupport) {
|
if($this->multibyteSupport) {
|
||||||
@@ -1010,6 +1014,7 @@ class Sanitizer extends Wire {
|
|||||||
* - `maxLength` (int): maximum characters allowed, or 0=no max (default=16384 or 16kb).
|
* - `maxLength` (int): maximum characters allowed, or 0=no max (default=16384 or 16kb).
|
||||||
* - `maxBytes` (int): maximum bytes allowed (default=0, which implies maxLength*3 or 48kb).
|
* - `maxBytes` (int): maximum bytes allowed (default=0, which implies maxLength*3 or 48kb).
|
||||||
* - `stripTags` (bool): strip markup tags? (default=true).
|
* - `stripTags` (bool): strip markup tags? (default=true).
|
||||||
|
* - `stripMB4` (bool): strip emoji and other 4-byte UTF-8? (default=false).
|
||||||
* - `allowableTags` (string): markup tags that are allowed, if stripTags is true (use same format as for PHP's `strip_tags()` function.
|
* - `allowableTags` (string): markup tags that are allowed, if stripTags is true (use same format as for PHP's `strip_tags()` function.
|
||||||
* - `allowCRLF` (bool): allow CR+LF newlines (i.e. "\r\n")? (default=false, which means "\r\n" is replaced with "\n").
|
* - `allowCRLF` (bool): allow CR+LF newlines (i.e. "\r\n")? (default=false, which means "\r\n" is replaced with "\n").
|
||||||
* - `inCharset` (string): input character set (default="UTF-8").
|
* - `inCharset` (string): input character set (default="UTF-8").
|
||||||
@@ -1799,6 +1804,41 @@ class Sanitizer extends Wire {
|
|||||||
return str_replace(array("\r\n", "\r", "\n"), $replacement, $str);
|
return str_replace(array("\r\n", "\r", "\n"), $replacement, $str);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Removes 4-byte UTF-8 characters (like emoji) that produce error with with MySQL regular “UTF8” encoding
|
||||||
|
*
|
||||||
|
* Returns the same value type that it is given. If given something other than a string or array, it just
|
||||||
|
* returns it without modification.
|
||||||
|
*
|
||||||
|
* @param string|array $value String or array containing strings
|
||||||
|
* @return string|array|mixed
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
function removeMB4($value) {
|
||||||
|
if(empty($value)) return $value;
|
||||||
|
if(is_array($value)) {
|
||||||
|
// process array recursively, looking for strings to convert
|
||||||
|
foreach($value as $key => $val) {
|
||||||
|
if(empty($val)) continue;
|
||||||
|
if(is_string($val) || is_array($val)) $value[$key] = $this->removeMB4($val);
|
||||||
|
}
|
||||||
|
} else if(is_string($value)) {
|
||||||
|
if(strlen($value) > 3 && max(array_map('ord', str_split($value))) >= 240) {
|
||||||
|
// string contains 4-byte characters
|
||||||
|
$regex =
|
||||||
|
'!(?:' .
|
||||||
|
'\xF0[\x90-\xBF][\x80-\xBF]{2}' .
|
||||||
|
'|[\xF1-\xF3][\x80-\xBF]{3}' .
|
||||||
|
'|\xF4[\x80-\x8F][\x80-\xBF]{2}' .
|
||||||
|
')!s';
|
||||||
|
$value = preg_replace($regex, '', $value);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// not a string or an array, leave as-is
|
||||||
|
}
|
||||||
|
return $value;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sanitize value to string
|
* Sanitize value to string
|
||||||
*
|
*
|
||||||
|
@@ -63,6 +63,14 @@ class WireDatabasePDO extends Wire implements WireDatabase {
|
|||||||
*/
|
*/
|
||||||
protected $init = false;
|
protected $init = false;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Strip 4-byte characters in “quote” and “escapeStr” methods? (only when dbEngine is not utf8mb4)
|
||||||
|
*
|
||||||
|
* @var bool
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
protected $stripMB4 = false;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* PDO connection settings
|
* PDO connection settings
|
||||||
*
|
*
|
||||||
@@ -167,6 +175,7 @@ class WireDatabasePDO extends Wire implements WireDatabase {
|
|||||||
if($this->init || !$this->isWired()) return;
|
if($this->init || !$this->isWired()) return;
|
||||||
$this->init = true;
|
$this->init = true;
|
||||||
$config = $this->wire('config');
|
$config = $this->wire('config');
|
||||||
|
$this->stripMB4 = $config->dbStripMB4 && strtolower($config->dbEngine) != 'utf8mb4';
|
||||||
$this->queryLogMax = (int) $config->dbQueryLogMax;
|
$this->queryLogMax = (int) $config->dbQueryLogMax;
|
||||||
$sqlModes = $config->dbSqlModes;
|
$sqlModes = $config->dbSqlModes;
|
||||||
if(is_array($sqlModes)) {
|
if(is_array($sqlModes)) {
|
||||||
@@ -631,7 +640,7 @@ class WireDatabasePDO extends Wire implements WireDatabase {
|
|||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
public function escapeStr($str) {
|
public function escapeStr($str) {
|
||||||
return substr($this->pdo()->quote($str), 1, -1);
|
return substr($this->quote($str), 1, -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -660,6 +669,9 @@ class WireDatabasePDO extends Wire implements WireDatabase {
|
|||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
public function quote($str) {
|
public function quote($str) {
|
||||||
|
if($this->stripMB4 && is_string($str) && !empty($str)) {
|
||||||
|
$str = $this->wire('sanitizer')->removeMB4($str);
|
||||||
|
}
|
||||||
return $this->pdo()->quote($str);
|
return $this->pdo()->quote($str);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user