From a3a519aff2a4b63179b61ee45331f28c3b9d4abb Mon Sep 17 00:00:00 2001 From: Bernie Reiter Date: Thu, 20 Apr 2023 17:15:40 +0000 Subject: [PATCH] HTML API: Add support for a few invalid HTML comment forms. - Comments created by means of a tag closer with an invalid tag name, e.g. ``. - Comments closed with the invalid `--!>` closer. (Comments should be closed by `-->` but if the `!` appears it will also close it, in error.) - Empty tag name elements, which are technically skipped over and aren't comments, e.g. ``. Props dmsnell, costdev. Merges [55667] to the 6.2 branch. Fixes #58007. git-svn-id: https://develop.svn.wordpress.org/branches/6.2@55668 602fd350-edb4-49c9-b593-d223f7449a82 --- .../html-api/class-wp-html-tag-processor.php | 66 ++++++++- .../tests/html-api/wpHtmlTagProcessor.php | 134 ++++++++++++++++++ 2 files changed, 195 insertions(+), 5 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index 1a7fd6dfa3..a9e4775f14 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -971,6 +971,7 @@ class WP_HTML_Tag_Processor { * closing `>`; these are left for other methods. * * @since 6.2.0 + * @since 6.2.1 Support abruptly-closed comments, invalid-tag-closer-comments, and empty elements. * * @return bool Whether a tag was found before the end of the document. */ @@ -1039,13 +1040,42 @@ class WP_HTML_Tag_Processor { '-' === $html[ $at + 2 ] && '-' === $html[ $at + 3 ] ) { - $closer_at = strpos( $html, '-->', $at + 4 ); - if ( false === $closer_at ) { + $closer_at = $at + 4; + // If it's not possible to close the comment then there is nothing more to scan. + if ( strlen( $html ) <= $closer_at ) { return false; } - $at = $closer_at + 3; - continue; + // Abruptly-closed empty comments are a sequence of dashes followed by `>`. + $span_of_dashes = strspn( $html, '-', $closer_at ); + if ( '>' === $html[ $closer_at + $span_of_dashes ] ) { + $at = $closer_at + $span_of_dashes + 1; + continue; + } + + /* + * Comments may be closed by either a --> or an invalid --!>. + * The first occurrence closes the comment. + * + * See https://html.spec.whatwg.org/#parse-error-incorrectly-closed-comment + */ + $closer_at--; // Pre-increment inside condition below reduces risk of accidental infinite looping. + while ( ++$closer_at < strlen( $html ) ) { + $closer_at = strpos( $html, '--', $closer_at ); + if ( false === $closer_at ) { + return false; + } + + if ( $closer_at + 2 < strlen( $html ) && '>' === $html[ $closer_at + 2 ] ) { + $at = $closer_at + 3; + continue 2; + } + + if ( $closer_at + 3 < strlen( $html ) && '!' === $html[ $closer_at + 2 ] && '>' === $html[ $closer_at + 3 ] ) { + $at = $closer_at + 4; + continue 2; + } + } } /* @@ -1104,9 +1134,19 @@ class WP_HTML_Tag_Processor { continue; } + /* + * is a missing end tag name, which is ignored. + * + * See https://html.spec.whatwg.org/#parse-error-missing-end-tag-name + */ + if ( '>' === $html[ $at + 1 ] ) { + $at++; + continue; + } + /* * - * https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state + * See https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state */ if ( '?' === $html[ $at + 1 ] ) { $closer_at = strpos( $html, '>', $at + 2 ); @@ -1118,6 +1158,22 @@ class WP_HTML_Tag_Processor { continue; } + /* + * If a non-alpha starts the tag name in a tag closer it's a comment. + * Find the first `>`, which closes the comment. + * + * See https://html.spec.whatwg.org/#parse-error-invalid-first-character-of-tag-name + */ + if ( $this->is_closing_tag ) { + $closer_at = strpos( $html, '>', $at + 3 ); + if ( false === $closer_at ) { + return false; + } + + $at = $closer_at + 1; + continue; + } + ++$at; } diff --git a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php index f5a8c1046f..f218d17f43 100644 --- a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php @@ -1682,6 +1682,47 @@ HTML; ); } + /** + * Invalid tag names are comments on tag closers. + * + * @ticket 58007 + * + * @link https://html.spec.whatwg.org/#parse-error-invalid-first-character-of-tag-name + * + * @dataProvider data_next_tag_ignores_invalid_first_character_of_tag_name_comments + * + * @param string $html_with_markers HTML containing an invalid tag closer whose element before and + * element after contain the "start" and "end" CSS classes. + */ + public function test_next_tag_ignores_invalid_first_character_of_tag_name_comments( $html_with_markers ) { + $p = new WP_HTML_Tag_Processor( $html_with_markers ); + $p->next_tag( array( 'class_name' => 'start' ) ); + $p->next_tag(); + + $this->assertSame( 'end', $p->get_attribute( 'class' ) ); + } + + /** + * Data provider. + * + * @return array[] + */ + public function data_next_tag_ignores_invalid_first_character_of_tag_name_comments() { + return array( + 'Invalid tag openers as normal text' => array( + '', + ), + + 'Invalid tag closers as comments' => array( + '', + ), + + 'Unexpected question mark instead of tag name' => array( + '

', + ), + ); + } + /** * @ticket 56299 * @@ -1734,6 +1775,99 @@ HTML; ); } + /** + * Ensures that the invalid comment closing syntax "--!>" properly closes a comment. + * + * @ticket 58007 + * + * @covers WP_HTML_Tag_Processor::next_tag + * + */ + public function test_allows_incorrectly_closed_comments() { + $p = new WP_HTML_Tag_Processor( '-->' ); + + $p->next_tag(); + $this->assertSame( 'before', $p->get_attribute( 'id' ), 'Did not find starting tag.' ); + + $p->next_tag(); + $this->assertSame( 'after', $p->get_attribute( 'id' ), 'Did not properly close improperly-closed comment.' ); + + $p->next_tag(); + $this->assertSame( 'final', $p->get_attribute( 'id' ), 'Did not skip over unopened comment-closer.' ); + } + + /** + * Ensures that unclosed and invalid comments don't trigger warnings or errors. + * + * @ticket 58007 + * + * @covers WP_HTML_Tag_Processor::next_tag + * + * @dataProvider data_html_with_unclosed_comments + * + * @param string $html_ending_before_comment_close HTML with opened comments that aren't closed + */ + public function test_documents_may_end_with_unclosed_comment( $html_ending_before_comment_close ) { + $p = new WP_HTML_Tag_Processor( $html_ending_before_comment_close ); + + $this->assertFalse( $p->next_tag() ); + } + + /** + * Data provider. + * + * @return array[] + */ + public function data_html_with_unclosed_comments() { + return array( + 'Shortest open valid comment' => array( '
' ), + 'Empty comment with two dashes only, improperly closed' => array( '

' ), + 'Comment with two dashes only, improperly closed twice' => array( '

' ), + 'Empty comment with three dashes' => array( '

' ), + 'Empty comment with three dashes, improperly closed' => array( '

' ), + 'Comment with three dashes, improperly closed twice' => array( '

' ), + 'Empty comment with four dashes' => array( '

' ), + 'Empty comment with four dashes, improperly closed' => array( '

-->
' ), + 'Comment with four dashes, improperly closed twice' => array( '

--!>
' ), + 'Comment with almost-closer inside' => array( '

--!>
' ), + ); + } + /** * @ticket 56299 *