From 4d0a53b5ee88b9a5a810eb3bcb5cee35659ee5aa Mon Sep 17 00:00:00 2001 From: rxu <rxu@mail.ru> Date: Sun, 25 Jul 2010 16:57:00 +0800 Subject: [PATCH] [ticket/9747] Improve word censor. Better handling of the asterisk inside censor pattern like 'bad*word' etc. PHPBB3-9747 --- phpBB/includes/cache.php | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/phpBB/includes/cache.php b/phpBB/includes/cache.php index 6b1e078ca4..b50fab4ca2 100644 --- a/phpBB/includes/cache.php +++ b/phpBB/includes/cache.php @@ -88,7 +88,14 @@ class cache extends acm { if ($unicode) { - $censors['match'][] = '#(?<![\p{Nd}\p{L}_])(' . str_replace('\*', '[\p{Nd}\p{L}_]*?', preg_quote($row['word'], '#')) . ')(?![\p{Nd}\p{L}_])#iu'; + // Unescape the asterisk to simplify further conversions + $row['word'] = str_replace('\*', '*', preg_quote($row['word'], '#')); + + // Replace the asterisk inside the pattern, at the start and at the end of it with regexes + $row['word'] = preg_replace(array('#(?<=[\p{Nd}\p{L}_])\*(?=[\p{Nd}\p{L}_])#iu', '#^\*#', '#\*$#'), array('([\x20]*?|[\p{Nd}\p{L}_-]*?)', '[\p{Nd}\p{L}_-]*?', '[\p{Nd}\p{L}_-]*?'), $row['word']); + + // Generate the final substitution + $censors['match'][] = '#(?<![\p{Nd}\p{L}_-])(' . $row['word'] . ')(?![\p{Nd}\p{L}_-])#iu'; } else {