Ensure that shortcode_unautop() treats   like whitespace.

`shortcode_unautop()` and `wptexturize()` now use `wp_spaces_regexp()` instead of raw regex.

Adds unit tests.

Props miqrogroove.
See #27588.


git-svn-id: https://develop.svn.wordpress.org/trunk@28716 602fd350-edb4-49c9-b593-d223f7449a82
This commit is contained in:
Scott Taylor 2014-06-10 01:54:11 +00:00
parent 8ec05a3926
commit 97bacb3121
2 changed files with 37 additions and 11 deletions

View File

@ -82,14 +82,7 @@ function wptexturize($text) {
$static_characters = array_merge( array( '---', ' -- ', '--', ' - ', 'xn–', '...', '``', '\'\'', ' (tm)' ), $cockney );
$static_replacements = array_merge( array( $em_dash, ' ' . $em_dash . ' ', $en_dash, ' ' . $en_dash . ' ', 'xn--', '…', $opening_quote, $closing_quote, ' ™' ), $cockneyreplace );
/*
* Regex for common whitespace characters.
*
* By default, spaces include new lines, tabs, nbsp entities, and the UTF-8 nbsp.
* This is designed to replace the PCRE \s sequence. In #WP22692, that sequence
* was found to be unreliable due to random inclusion of the A0 byte.
*/
$spaces = '[\r\n\t ]|\xC2\xA0| ';
$spaces = wp_spaces_regexp();
// Pattern-based replacements of characters.
@ -370,11 +363,12 @@ function shortcode_unautop( $pee ) {
}
$tagregexp = join( '|', array_map( 'preg_quote', array_keys( $shortcode_tags ) ) );
$spaces = wp_spaces_regexp();
$pattern =
'/'
. '<p>' // Opening paragraph
. '\\s*+' // Optional leading whitespace
. '(?:' . $spaces . ')*+' // Optional leading whitespace
. '(' // 1: The shortcode
. '\\[' // Opening bracket
. "($tagregexp)" // 2: Shortcode name
@ -399,7 +393,7 @@ function shortcode_unautop( $pee ) {
. ')?'
. ')'
. ')'
. '\\s*+' // optional trailing whitespace
. '(?:' . $spaces . ')*+' // optional trailing whitespace
. '<\\/p>' // closing paragraph
. '/s';
@ -3857,8 +3851,19 @@ function wp_spaces_regexp() {
static $spaces;
if ( empty( $spaces ) ) {
/**
* Regexp for common whitespace characters.
*
* This string is substituted for the \s sequence as needed in regular expressions.
* For websites not written in English, different characters may represent whitespace.
* For websites not encoded in UTF-8, the 0xC2 0xA0 sequence may not be in use.
*
* @since 4.0.0
*
* @param string $spaces
*/
$spaces = apply_filters( 'wp_spaces_regexp', '[\r\n\t ]|\xC2\xA0|&nbsp;' );
}
return $spaces;
}
}

View File

@ -373,4 +373,25 @@ EOF;
remove_filter( 'shortcode_atts_bartag', array( $this, '_filter_atts2' ), 10, 3 );
}
/**
* Check that shortcode_unautop() will always recognize spaces around shortcodes.
*
* @ticket 22692
*/
function test_spaces_around_shortcodes() {
$nbsp = "\xC2\xA0";
$input = array();
$input[] = "<p>[gallery ids=\"37,15,11\"]</p>";
$input[] = "<p> [gallery ids=\"37,15,11\"] </p>";
$input[] = "<p> {$nbsp}[gallery ids=\"37,15,11\"] {$nbsp}</p>";
$input[] = "<p> &nbsp;[gallery ids=\"37,15,11\"] &nbsp;</p>";
$output = "[gallery ids=\"37,15,11\"]";
foreach($input as $in) {
$this->assertEquals( $output, shortcode_unautop( $in ) );
}
}
}