From 6ca5bdc3ac193f3aa196b188baab10ec8857c11e Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Wed, 29 May 2024 11:40:16 +0000 Subject: [PATCH] HTML API: Fix token length bug in Tag Processor. The Tag Processor stores the byte-offsets into its HTML document where the current token starts and ends, and also for every bookmark. In some cases for tags, the end offset has been off by one. In this patch the offset is fixed so that a bookmark always properly refers to the full span of the token it's bookmarking. Also the current token byte offsets are properly recorded. While this is a defect in the Tag Processor, it hasn't been exposed through the public interface and has not affected any of the working of the processor. Only subclasses which rely on the length of a bookmark have been potentially affected, and these are not supported environments in the ongoing work. This fix is important for future work and for ensuring that subclasses performing custom behaviors remain as reliable as the public interface. Developed in https://github.com/WordPress/wordpress-develop/pull/6625 Discussed in https://core.trac.wordpress.org/ticket/61301 Props dmsnell, gziolo, jonsurrell, westonruter. Fixes #61301. git-svn-id: https://develop.svn.wordpress.org/trunk@58233 602fd350-edb4-49c9-b593-d223f7449a82 --- .../html-api/class-wp-html-tag-processor.php | 6 +- ...interactivity-api-directives-processor.php | 4 +- .../tests/html-api/wpHtmlTagProcessor.php | 105 +++++++++++++++++- 3 files changed, 109 insertions(+), 6 deletions(-) diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index 4597a888b5..26d22c072e 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -926,8 +926,8 @@ class WP_HTML_Tag_Processor { return false; } $this->parser_state = self::STATE_MATCHED_TAG; - $this->token_length = $tag_ends_at - $this->token_starts_at; $this->bytes_already_parsed = $tag_ends_at + 1; + $this->token_length = $this->bytes_already_parsed - $this->token_starts_at; /* * For non-DATA sections which might contain text that looks like HTML tags but @@ -1013,7 +1013,7 @@ class WP_HTML_Tag_Processor { */ $this->token_starts_at = $was_at; $this->token_length = $this->bytes_already_parsed - $this->token_starts_at; - $this->text_starts_at = $tag_ends_at + 1; + $this->text_starts_at = $tag_ends_at; $this->text_length = $this->tag_name_starts_at - $this->text_starts_at; $this->tag_name_starts_at = $tag_name_starts_at; $this->tag_name_length = $tag_name_length; @@ -2687,7 +2687,7 @@ class WP_HTML_Tag_Processor { *
* ^ this appears one character before the end of the closing ">". */ - return '/' === $this->html[ $this->token_starts_at + $this->token_length - 1 ]; + return '/' === $this->html[ $this->token_starts_at + $this->token_length - 2 ]; } /** diff --git a/src/wp-includes/interactivity-api/class-wp-interactivity-api-directives-processor.php b/src/wp-includes/interactivity-api/class-wp-interactivity-api-directives-processor.php index 3b2dcb1237..b12dcb4b3b 100644 --- a/src/wp-includes/interactivity-api/class-wp-interactivity-api-directives-processor.php +++ b/src/wp-includes/interactivity-api/class-wp-interactivity-api-directives-processor.php @@ -107,7 +107,7 @@ final class WP_Interactivity_API_Directives_Processor extends WP_HTML_Tag_Proces $bookmark = 'append_content_after_template_tag_closer'; $this->set_bookmark( $bookmark ); - $after_closing_tag = $this->bookmarks[ $bookmark ]->start + $this->bookmarks[ $bookmark ]->length + 1; + $after_closing_tag = $this->bookmarks[ $bookmark ]->start + $this->bookmarks[ $bookmark ]->length; $this->release_bookmark( $bookmark ); // Appends the new content. @@ -140,7 +140,7 @@ final class WP_Interactivity_API_Directives_Processor extends WP_HTML_Tag_Proces } list( $opener_tag, $closer_tag ) = $bookmarks; - $after_opener_tag = $this->bookmarks[ $opener_tag ]->start + $this->bookmarks[ $opener_tag ]->length + 1; + $after_opener_tag = $this->bookmarks[ $opener_tag ]->start + $this->bookmarks[ $opener_tag ]->length; $before_closer_tag = $this->bookmarks[ $closer_tag ]->start; if ( $rewind ) { diff --git a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php index 824630b335..fad1000dd7 100644 --- a/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php +++ b/tests/phpunit/tests/html-api/wpHtmlTagProcessor.php @@ -476,6 +476,109 @@ class Tests_HtmlApi_WpHtmlTagProcessor extends WP_UnitTestCase { $this->assertSame( '
', $processor->get_updated_html() ); } + /** + * Ensures that bookmarks start and length correctly describe a given token in HTML. + * + * @ticket 61301 + * + * @dataProvider data_html_nth_token_substring + * + * @param string $html Input HTML. + * @param int $match_nth_token Which token to inspect from input HTML. + * @param string $expected_match Expected full raw token bookmark should capture. + */ + public function test_token_bookmark_span( string $html, int $match_nth_token, string $expected_match ) { + $processor = new class( $html ) extends WP_HTML_Tag_Processor { + /** + * Returns the raw span of HTML for the currently-matched + * token, or null if not paused on any token. + * + * @return string|null Raw HTML content of currently-matched token, + * otherwise `null` if not matched. + */ + public function get_raw_token() { + if ( + WP_HTML_Tag_Processor::STATE_READY === $this->parser_state || + WP_HTML_Tag_Processor::STATE_INCOMPLETE_INPUT === $this->parser_state || + WP_HTML_Tag_Processor::STATE_COMPLETE === $this->parser_state + ) { + return null; + } + + $this->set_bookmark( 'mark' ); + $mark = $this->bookmarks['mark']; + + return substr( $this->html, $mark->start, $mark->length ); + } + }; + + for ( $i = 0; $i < $match_nth_token; $i++ ) { + $processor->next_token(); + } + + $raw_token = $processor->get_raw_token(); + $this->assertIsString( + $raw_token, + "Failed to find raw token at position {$match_nth_token}: check test data provider." + ); + + $this->assertSame( + $expected_match, + $raw_token, + 'Bookmarked wrong span of text for full matched token.' + ); + } + + /** + * Data provider. + * + * @return array + */ + public static function data_html_nth_token_substring() { + return array( + // Tags. + 'DIV start tag' => array( '
', 1, '
' ), + 'DIV start tag with attributes' => array( '
', 1, '
' ), + 'DIV end tag' => array( '
', 1, '
' ), + 'DIV end tag with attributes' => array( '
', 1, '
' ), + 'Nested DIV' => array( '
', 2, '
' ), + 'Sibling DIV' => array( '
', 3, '
' ), + 'DIV after text' => array( 'text
', 2, '
' ), + 'DIV before text' => array( '
text', 1, '
' ), + 'DIV after comment' => array( '
', 2, '
' ), + 'DIV before comment' => array( '
', 1, '
' ), + 'Start "self-closing" tag' => array( '
', 1, '
' ), + 'Void tag' => array( '', 1, '' ), + 'Void tag w/self-closing flag' => array( '', 1, '' ), + 'Void tag inside DIV' => array( '
', 2, '' ), + + // Special atomic tags. + 'SCRIPT tag' => array( '', 1, '' ), + 'SCRIPT double-escape' => array( '
', 1, '' ), + + // Text. + 'Text' => array( 'Just text', 1, 'Just text' ), + 'Text in DIV' => array( '
Text
', 2, 'Text' ), + 'Text before DIV' => array( 'Text
', 1, 'Text' ), + 'Text after DIV' => array( '
Text', 3, 'Text' ), + 'Text after comment' => array( 'Text', 2, 'Text' ), + 'Text before comment' => array( 'Text ', 1, 'Text' ), + + // Comments. + 'Comment' => array( '', 1, '' ), + 'Comment in DIV' => array( '
', 2, '' ), + 'Comment before DIV' => array( '
', 1, '' ), + 'Comment after DIV' => array( '
', 3, '' ), + 'Comment after comment' => array( '', 2, '' ), + 'Comment before comment' => array( ' ', 1, '' ), + 'Abruptly closed comment' => array( '', 1, '' ), + 'Empty comment' => array( '', 1, '' ), + 'Funky comment' => array( '', 1, '' ), + 'PI lookalike comment' => array( '', 1, '' ), + 'CDATA lookalike comment' => array( '', 1, '' ), + ); + } + /** * @ticket 56299 * @@ -2746,7 +2849,7 @@ HTML public function insert_after( $new_html ) { $this->set_bookmark( 'here' ); $this->lexical_updates[] = new WP_HTML_Text_Replacement( - $this->bookmarks['here']->start + $this->bookmarks['here']->length + 1, + $this->bookmarks['here']->start + $this->bookmarks['here']->length, 0, $new_html );