mirror of
				https://github.com/phpbb/phpbb.git
				synced 2025-10-26 05:06:12 +01:00 
			
		
		
		
	Merge branch 'ticket/rxu/9933' into develop-olympus
* ticket/rxu/9933: [ticket/9933] Create unit test for word censor regular expression. [ticket/9933] Move word censor regex into separate function in functions.php [ticket/9933] Wrong handling consecutive multiple asterisks in word censor
This commit is contained in:
		| @@ -95,6 +95,9 @@ class acp_words | ||||
| 					trigger_error($user->lang['ENTER_WORD'] . adm_back_link($this->u_action), E_USER_WARNING); | ||||
| 				} | ||||
|  | ||||
| 				// Replace multiple consecutive asterisks with single one as those are not needed | ||||
| 				$word = preg_replace('#\*{2,}#', '*', $word); | ||||
|  | ||||
| 				$sql_ary = array( | ||||
| 					'word'			=> $word, | ||||
| 					'replacement'	=> $replacement | ||||
|   | ||||
| @@ -82,26 +82,9 @@ class cache extends acm | ||||
| 			$result = $db->sql_query($sql); | ||||
|  | ||||
| 			$censors = array(); | ||||
| 			$unicode = ((version_compare(PHP_VERSION, '5.1.0', '>=') || (version_compare(PHP_VERSION, '5.0.0-dev', '<=') && version_compare(PHP_VERSION, '4.4.0', '>='))) && @preg_match('/\p{L}/u', 'a') !== false) ? true : false; | ||||
|  | ||||
| 			while ($row = $db->sql_fetchrow($result)) | ||||
| 			{ | ||||
| 				if ($unicode) | ||||
| 				{ | ||||
| 					// Unescape the asterisk to simplify further conversions | ||||
| 					$row['word'] = str_replace('\*', '*', preg_quote($row['word'], '#')); | ||||
| 					 | ||||
| 					// Replace the asterisk inside the pattern, at the start and at the end of it with regexes | ||||
| 					$row['word'] = preg_replace(array('#(?<=[\p{Nd}\p{L}_])\*(?=[\p{Nd}\p{L}_])#iu', '#^\*#', '#\*$#'), array('([\x20]*?|[\p{Nd}\p{L}_-]*?)', '[\p{Nd}\p{L}_-]*?', '[\p{Nd}\p{L}_-]*?'), $row['word']); | ||||
|  | ||||
| 					// Generate the final substitution | ||||
| 					$censors['match'][] = '#(?<![\p{Nd}\p{L}_-])(' . $row['word'] . ')(?![\p{Nd}\p{L}_-])#iu'; | ||||
| 				} | ||||
| 				else | ||||
| 				{ | ||||
| 					$censors['match'][] = '#(?<!\S)(' . str_replace('\*', '\S*?', preg_quote($row['word'], '#')) . ')(?!\S)#iu'; | ||||
| 				} | ||||
|  | ||||
| 				$censors['match'][] = get_censor_preg_expression($row['word']); | ||||
| 				$censors['replace'][] = $row['replacement']; | ||||
| 			} | ||||
| 			$db->sql_freeresult($result); | ||||
|   | ||||
| @@ -3428,6 +3428,48 @@ function get_preg_expression($mode) | ||||
| 	return ''; | ||||
| } | ||||
|  | ||||
| /** | ||||
| * Generate regexp for naughty words censoring | ||||
| * Depends on whether installed PHP version supports unicode properties | ||||
| * | ||||
| * @param string	$word	word template to be replaced | ||||
| * | ||||
| * @return string $preg_expr		regex to use with word censor | ||||
| */ | ||||
| function get_censor_preg_expression($word) | ||||
| { | ||||
| 	static $unicode = null; | ||||
|  | ||||
| 	if (empty($word)) | ||||
| 	{ | ||||
| 		return ''; | ||||
| 	} | ||||
|  | ||||
| 	// Check whether PHP version supports unicode properties | ||||
| 	if (is_null($unicode)) | ||||
| 	{ | ||||
| 		$unicode = ((version_compare(PHP_VERSION, '5.1.0', '>=') || (version_compare(PHP_VERSION, '5.0.0-dev', '<=') && version_compare(PHP_VERSION, '4.4.0', '>='))) && @preg_match('/\p{L}/u', 'a') !== false) ? true : false; | ||||
| 	} | ||||
|  | ||||
| 	if ($unicode) | ||||
| 	{ | ||||
| 		// Unescape the asterisk to simplify further conversions | ||||
| 		$word = str_replace('\*', '*', preg_quote($word, '#')); | ||||
|  | ||||
| 		// Replace asterisk(s) inside the pattern, at the start and at the end of it with regexes | ||||
| 		$word = preg_replace(array('#(?<=[\p{Nd}\p{L}_])\*+(?=[\p{Nd}\p{L}_])#iu', '#^\*+#', '#\*+$#'), array('([\x20]*?|[\p{Nd}\p{L}_-]*?)', '[\p{Nd}\p{L}_-]*?', '[\p{Nd}\p{L}_-]*?'), $word); | ||||
|  | ||||
| 		// Generate the final substitution | ||||
| 		$preg_expr = '#(?<![\p{Nd}\p{L}_-])(' . $word . ')(?![\p{Nd}\p{L}_-])#iu'; | ||||
| 	} | ||||
| 	else | ||||
| 	{ | ||||
| 		$preg_expr = '#(?<!\S)(' . str_replace('\*', '\S*?', preg_quote($word, '#')) . ')(?!\S)#iu'; | ||||
| 	} | ||||
|  | ||||
| 	return $preg_expr; | ||||
| } | ||||
|  | ||||
| /** | ||||
| * Returns the first block of the specified IPv6 address and as many additional | ||||
| * ones as specified in the length paramater. | ||||
|   | ||||
							
								
								
									
										40
									
								
								tests/regex/censor.php
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										40
									
								
								tests/regex/censor.php
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,40 @@ | ||||
| <?php | ||||
| /** | ||||
| * | ||||
| * @package testing | ||||
| * @copyright (c) 2010 phpBB Group | ||||
| * @license http://opensource.org/licenses/gpl-license.php GNU Public License | ||||
| * | ||||
| */ | ||||
|  | ||||
| require_once __DIR__ . '/../../phpBB/includes/functions.php'; | ||||
|  | ||||
| class phpbb_regex_censor_test extends phpbb_test_case | ||||
| { | ||||
| 	public function censor_test_data() | ||||
| 	{ | ||||
| 		return array( | ||||
| 			array('bad*word', 'bad word'), | ||||
| 			array('bad***word', 'bad word'), | ||||
| 			array('bad**word', 'bad word'), | ||||
| 			array('*bad*word*', 'bad word'), | ||||
| 			array('b*d', 'bad'), | ||||
| 			array('*bad*', 'bad'), | ||||
| 			array('*b*d*', 'bad'), | ||||
| 			array('*b*d*', 'b d'), | ||||
| 			array('b*d*word', 'bad word'), | ||||
| 			array('**b**d**word**', 'bad word'), | ||||
| 			array('**b**d**word**', 'the bad word catched'), | ||||
| 		); | ||||
| 	} | ||||
|  | ||||
| 	/** | ||||
| 	* @dataProvider censor_test_data | ||||
| 	*/ | ||||
| 	public function test_censor($pattern, $subject) | ||||
| 	{ | ||||
| 		$regex = get_censor_preg_expression($pattern); | ||||
|  | ||||
| 		$this->assertRegExp($regex, $subject); | ||||
| 	} | ||||
| } | ||||
		Reference in New Issue
	
	Block a user