mirror of
https://github.com/processwire/processwire.git
synced 2025-08-08 15:57:01 +02:00
Fix issue processwire/processwire-issues#192 where inserted emoji could cause text to be truncated on systems using dbEngine "utf8" (as opposed to "utf8mb4"). Because the emoji/MB4 detection and replacement has some overhead, it's not enabled by default. To enable, set $config->dbStripMB4=true; in your /site/config.php file.
This commit is contained in:
@@ -912,6 +912,17 @@ $config->dbSocket = '';
|
||||
*/
|
||||
$config->dbQueryLogMax = 500;
|
||||
|
||||
/**
|
||||
* Remove 4-byte characters (like emoji) when dbEngine is not utf8mb4?
|
||||
*
|
||||
* When charset is not “utf8mb4” and this value is true, 4-byte UTF-8 characters are stripped
|
||||
* out of inserted values when possible. Note that this can add some overhead to INSERTs.
|
||||
*
|
||||
* @var bool
|
||||
*
|
||||
*/
|
||||
$config->dbStripMB4 = false;
|
||||
|
||||
|
||||
|
||||
/*** 8. MODULES *********************************************************************************/
|
||||
|
@@ -104,7 +104,7 @@
|
||||
* @property array $dbSqlModes Set or adjust SQL mode per MySQL version, where array keys are MySQL version and values are SQL mode command(s). #pw-group-database
|
||||
* @property int $dbQueryLogMax Maximum number of queries WireDatabasePDO will log in memory, when debug mode is enabled (default=1000). #pw-group-database
|
||||
* @property string $dbInitCommand Database init command, for PDO::MYSQL_ATTR_INIT_COMMAND. Note placeholder {charset} gets replaced with $config->dbCharset. #pw-group-database
|
||||
* $property array $dbSqlModes Set, add or remove SQL mode based on MySQL version. See default in /wire/config.php for details. #pw-group-database
|
||||
* @property bool $dbStripMB4 When dbEngine is not utf8mb4 and this is true, we will attempt to remove 4-byte characters (like emoji) from inserts when possible. Note that this adds some overhead. #pw-group-database
|
||||
*
|
||||
* @property array $pageList Settings specific to Page lists. #pw-group-modules
|
||||
* @property array $pageEdit Settings specific to Page editors. #pw-group-modules
|
||||
|
@@ -38,6 +38,11 @@ class PagesEditor extends Wire {
|
||||
|
||||
public function __construct(Pages $pages) {
|
||||
$this->pages = $pages;
|
||||
|
||||
$config = $pages->wire('config');
|
||||
if($config->dbStripMB4 && strtolower($config->dbEngine) != 'utf8mb4') {
|
||||
$this->addHookAfter('Fieldtype::sleepValue', $this, 'hookFieldtypeSleepValueStripMB4');
|
||||
}
|
||||
}
|
||||
|
||||
public function isCloning() {
|
||||
@@ -1384,4 +1389,16 @@ class PagesEditor extends Wire {
|
||||
|
||||
return count($sorts);
|
||||
}
|
||||
|
||||
/**
|
||||
* Hook after Fieldtype::sleepValue to remove MB4 characters when present and applicable
|
||||
*
|
||||
* This hook is only used if $config->dbStripMB4 is true and $config->dbEngine is not “utf8mb4”.
|
||||
*
|
||||
* @param HookEvent $event
|
||||
*
|
||||
*/
|
||||
protected function hookFieldtypeSleepValueStripMB4(HookEvent $event) {
|
||||
$event->return = $this->wire('sanitizer')->removeMB4($event->return);
|
||||
}
|
||||
}
|
||||
|
@@ -918,6 +918,7 @@ class Sanitizer extends Wire {
|
||||
* - `maxLength` (int): maximum characters allowed, or 0=no max (default=255).
|
||||
* - `maxBytes` (int): maximum bytes allowed (default=0, which implies maxLength*4).
|
||||
* - `stripTags` (bool): strip markup tags? (default=true).
|
||||
* - `stripMB4` (bool): strip emoji and other 4-byte UTF-8? (default=false).
|
||||
* - `allowableTags` (string): markup tags that are allowed, if stripTags is true (use same format as for PHP's `strip_tags()` function.
|
||||
* - `multiLine` (bool): allow multiple lines? if false, then $newlineReplacement below is applicable (default=false).
|
||||
* - `newlineReplacement` (string): character to replace newlines with, OR specify boolean TRUE to remove extra lines (default=" ").
|
||||
@@ -933,6 +934,7 @@ class Sanitizer extends Wire {
|
||||
'maxLength' => 255, // maximum characters allowed, or 0=no max
|
||||
'maxBytes' => 0, // maximum bytes allowed (0 = default, which is maxLength*4)
|
||||
'stripTags' => true, // strip markup tags
|
||||
'stripMB4' => false, // strip Emoji and 4-byte characters?
|
||||
'allowableTags' => '', // tags that are allowed, if stripTags is true (use same format as for PHP's strip_tags function)
|
||||
'multiLine' => false, // allow multiple lines? if false, then $newlineReplacement below is applicable
|
||||
'newlineReplacement' => ' ', // character to replace newlines with, OR specify boolean TRUE to remove extra lines
|
||||
@@ -964,6 +966,8 @@ class Sanitizer extends Wire {
|
||||
|
||||
if($options['inCharset'] != $options['outCharset']) $value = iconv($options['inCharset'], $options['outCharset'], $value);
|
||||
|
||||
if($options['stripMB4']) $value = $this->removeMB4($value);
|
||||
|
||||
if($options['maxLength']) {
|
||||
if(empty($options['maxBytes'])) $options['maxBytes'] = $options['maxLength'] * 4;
|
||||
if($this->multibyteSupport) {
|
||||
@@ -1010,6 +1014,7 @@ class Sanitizer extends Wire {
|
||||
* - `maxLength` (int): maximum characters allowed, or 0=no max (default=16384 or 16kb).
|
||||
* - `maxBytes` (int): maximum bytes allowed (default=0, which implies maxLength*3 or 48kb).
|
||||
* - `stripTags` (bool): strip markup tags? (default=true).
|
||||
* - `stripMB4` (bool): strip emoji and other 4-byte UTF-8? (default=false).
|
||||
* - `allowableTags` (string): markup tags that are allowed, if stripTags is true (use same format as for PHP's `strip_tags()` function.
|
||||
* - `allowCRLF` (bool): allow CR+LF newlines (i.e. "\r\n")? (default=false, which means "\r\n" is replaced with "\n").
|
||||
* - `inCharset` (string): input character set (default="UTF-8").
|
||||
@@ -1799,6 +1804,41 @@ class Sanitizer extends Wire {
|
||||
return str_replace(array("\r\n", "\r", "\n"), $replacement, $str);
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes 4-byte UTF-8 characters (like emoji) that produce error with with MySQL regular “UTF8” encoding
|
||||
*
|
||||
* Returns the same value type that it is given. If given something other than a string or array, it just
|
||||
* returns it without modification.
|
||||
*
|
||||
* @param string|array $value String or array containing strings
|
||||
* @return string|array|mixed
|
||||
*
|
||||
*/
|
||||
function removeMB4($value) {
|
||||
if(empty($value)) return $value;
|
||||
if(is_array($value)) {
|
||||
// process array recursively, looking for strings to convert
|
||||
foreach($value as $key => $val) {
|
||||
if(empty($val)) continue;
|
||||
if(is_string($val) || is_array($val)) $value[$key] = $this->removeMB4($val);
|
||||
}
|
||||
} else if(is_string($value)) {
|
||||
if(strlen($value) > 3 && max(array_map('ord', str_split($value))) >= 240) {
|
||||
// string contains 4-byte characters
|
||||
$regex =
|
||||
'!(?:' .
|
||||
'\xF0[\x90-\xBF][\x80-\xBF]{2}' .
|
||||
'|[\xF1-\xF3][\x80-\xBF]{3}' .
|
||||
'|\xF4[\x80-\x8F][\x80-\xBF]{2}' .
|
||||
')!s';
|
||||
$value = preg_replace($regex, '', $value);
|
||||
}
|
||||
} else {
|
||||
// not a string or an array, leave as-is
|
||||
}
|
||||
return $value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sanitize value to string
|
||||
*
|
||||
|
@@ -63,6 +63,14 @@ class WireDatabasePDO extends Wire implements WireDatabase {
|
||||
*/
|
||||
protected $init = false;
|
||||
|
||||
/**
|
||||
* Strip 4-byte characters in “quote” and “escapeStr” methods? (only when dbEngine is not utf8mb4)
|
||||
*
|
||||
* @var bool
|
||||
*
|
||||
*/
|
||||
protected $stripMB4 = false;
|
||||
|
||||
/**
|
||||
* PDO connection settings
|
||||
*
|
||||
@@ -167,6 +175,7 @@ class WireDatabasePDO extends Wire implements WireDatabase {
|
||||
if($this->init || !$this->isWired()) return;
|
||||
$this->init = true;
|
||||
$config = $this->wire('config');
|
||||
$this->stripMB4 = $config->dbStripMB4 && strtolower($config->dbEngine) != 'utf8mb4';
|
||||
$this->queryLogMax = (int) $config->dbQueryLogMax;
|
||||
$sqlModes = $config->dbSqlModes;
|
||||
if(is_array($sqlModes)) {
|
||||
@@ -631,7 +640,7 @@ class WireDatabasePDO extends Wire implements WireDatabase {
|
||||
*
|
||||
*/
|
||||
public function escapeStr($str) {
|
||||
return substr($this->pdo()->quote($str), 1, -1);
|
||||
return substr($this->quote($str), 1, -1);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -660,6 +669,9 @@ class WireDatabasePDO extends Wire implements WireDatabase {
|
||||
*
|
||||
*/
|
||||
public function quote($str) {
|
||||
if($this->stripMB4 && is_string($str) && !empty($str)) {
|
||||
$str = $this->wire('sanitizer')->removeMB4($str);
|
||||
}
|
||||
return $this->pdo()->quote($str);
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user