From 4d0a53b5ee88b9a5a810eb3bcb5cee35659ee5aa Mon Sep 17 00:00:00 2001
From: rxu <rxu@mail.ru>
Date: Sun, 25 Jul 2010 16:57:00 +0800
Subject: [PATCH] [ticket/9747] Improve word censor.

Better handling of the asterisk inside censor pattern like 'bad*word' etc.

PHPBB3-9747
---
 phpBB/includes/cache.php | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/phpBB/includes/cache.php b/phpBB/includes/cache.php
index 6b1e078ca4..b50fab4ca2 100644
--- a/phpBB/includes/cache.php
+++ b/phpBB/includes/cache.php
@@ -88,7 +88,14 @@ class cache extends acm
 			{
 				if ($unicode)
 				{
-					$censors['match'][] = '#(?<![\p{Nd}\p{L}_])(' . str_replace('\*', '[\p{Nd}\p{L}_]*?', preg_quote($row['word'], '#')) . ')(?![\p{Nd}\p{L}_])#iu';
+					// Unescape the asterisk to simplify further conversions
+					$row['word'] = str_replace('\*', '*', preg_quote($row['word'], '#'));
+					
+					// Replace the asterisk inside the pattern, at the start and at the end of it with regexes
+					$row['word'] = preg_replace(array('#(?<=[\p{Nd}\p{L}_])\*(?=[\p{Nd}\p{L}_])#iu', '#^\*#', '#\*$#'), array('([\x20]*?|[\p{Nd}\p{L}_-]*?)', '[\p{Nd}\p{L}_-]*?', '[\p{Nd}\p{L}_-]*?'), $row['word']);
+
+					// Generate the final substitution
+					$censors['match'][] = '#(?<![\p{Nd}\p{L}_-])(' . $row['word'] . ')(?![\p{Nd}\p{L}_-])#iu';
 				}
 				else
 				{