HTML API: Add support for a few invalid HTML comment forms.

- Comments created by means of a tag closer with an invalid tag name, e.g. `</3>`.
 - Comments closed with the invalid `--!>` closer. (Comments should be closed by `-->` but if the `!` appears it will also close it, in error.)
 - Empty tag name elements, which are technically skipped over and aren't comments, e.g. `</>`.

Props dmsnell, costdev.
Merges [55667] to the 6.2 branch.
Fixes #58007.

git-svn-id: https://develop.svn.wordpress.org/branches/6.2@55668 602fd350-edb4-49c9-b593-d223f7449a82
This commit is contained in:
Bernie Reiter 2023-04-20 17:15:40 +00:00
parent 80ec1e60fb
commit a3a519aff2
2 changed files with 195 additions and 5 deletions

View File

@ -971,6 +971,7 @@ class WP_HTML_Tag_Processor {
* closing `>`; these are left for other methods.
*
* @since 6.2.0
* @since 6.2.1 Support abruptly-closed comments, invalid-tag-closer-comments, and empty elements.
*
* @return bool Whether a tag was found before the end of the document.
*/
@ -1039,13 +1040,42 @@ class WP_HTML_Tag_Processor {
'-' === $html[ $at + 2 ] &&
'-' === $html[ $at + 3 ]
) {
$closer_at = strpos( $html, '-->', $at + 4 );
if ( false === $closer_at ) {
$closer_at = $at + 4;
// If it's not possible to close the comment then there is nothing more to scan.
if ( strlen( $html ) <= $closer_at ) {
return false;
}
$at = $closer_at + 3;
continue;
// Abruptly-closed empty comments are a sequence of dashes followed by `>`.
$span_of_dashes = strspn( $html, '-', $closer_at );
if ( '>' === $html[ $closer_at + $span_of_dashes ] ) {
$at = $closer_at + $span_of_dashes + 1;
continue;
}
/*
* Comments may be closed by either a --> or an invalid --!>.
* The first occurrence closes the comment.
*
* See https://html.spec.whatwg.org/#parse-error-incorrectly-closed-comment
*/
$closer_at--; // Pre-increment inside condition below reduces risk of accidental infinite looping.
while ( ++$closer_at < strlen( $html ) ) {
$closer_at = strpos( $html, '--', $closer_at );
if ( false === $closer_at ) {
return false;
}
if ( $closer_at + 2 < strlen( $html ) && '>' === $html[ $closer_at + 2 ] ) {
$at = $closer_at + 3;
continue 2;
}
if ( $closer_at + 3 < strlen( $html ) && '!' === $html[ $closer_at + 2 ] && '>' === $html[ $closer_at + 3 ] ) {
$at = $closer_at + 4;
continue 2;
}
}
}
/*
@ -1104,9 +1134,19 @@ class WP_HTML_Tag_Processor {
continue;
}
/*
* </> is a missing end tag name, which is ignored.
*
* See https://html.spec.whatwg.org/#parse-error-missing-end-tag-name
*/
if ( '>' === $html[ $at + 1 ] ) {
$at++;
continue;
}
/*
* <? transitions to a bogus comment state skip to the nearest >
* https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
* See https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
*/
if ( '?' === $html[ $at + 1 ] ) {
$closer_at = strpos( $html, '>', $at + 2 );
@ -1118,6 +1158,22 @@ class WP_HTML_Tag_Processor {
continue;
}
/*
* If a non-alpha starts the tag name in a tag closer it's a comment.
* Find the first `>`, which closes the comment.
*
* See https://html.spec.whatwg.org/#parse-error-invalid-first-character-of-tag-name
*/
if ( $this->is_closing_tag ) {
$closer_at = strpos( $html, '>', $at + 3 );
if ( false === $closer_at ) {
return false;
}
$at = $closer_at + 1;
continue;
}
++$at;
}

View File

@ -1682,6 +1682,47 @@ HTML;
);
}
/**
* Invalid tag names are comments on tag closers.
*
* @ticket 58007
*
* @link https://html.spec.whatwg.org/#parse-error-invalid-first-character-of-tag-name
*
* @dataProvider data_next_tag_ignores_invalid_first_character_of_tag_name_comments
*
* @param string $html_with_markers HTML containing an invalid tag closer whose element before and
* element after contain the "start" and "end" CSS classes.
*/
public function test_next_tag_ignores_invalid_first_character_of_tag_name_comments( $html_with_markers ) {
$p = new WP_HTML_Tag_Processor( $html_with_markers );
$p->next_tag( array( 'class_name' => 'start' ) );
$p->next_tag();
$this->assertSame( 'end', $p->get_attribute( 'class' ) );
}
/**
* Data provider.
*
* @return array[]
*/
public function data_next_tag_ignores_invalid_first_character_of_tag_name_comments() {
return array(
'Invalid tag openers as normal text' => array(
'<ul><li><div class=start>I <3 when outflow > inflow</div><img class=end></li></ul>',
),
'Invalid tag closers as comments' => array(
'<ul><li><div class=start>I </3 when <img> outflow <br class=end> inflow</div></li></ul>',
),
'Unexpected question mark instead of tag name' => array(
'<div class=start><?xml-stylesheet type="text/css" href="style.css"?><hr class=end>',
),
);
}
/**
* @ticket 56299
*
@ -1734,6 +1775,99 @@ HTML;
);
}
/**
* Ensures that the invalid comment closing syntax "--!>" properly closes a comment.
*
* @ticket 58007
*
* @covers WP_HTML_Tag_Processor::next_tag
*
*/
public function test_allows_incorrectly_closed_comments() {
$p = new WP_HTML_Tag_Processor( '<img id=before><!-- <img id=inside> --!><img id=after>--><img id=final>' );
$p->next_tag();
$this->assertSame( 'before', $p->get_attribute( 'id' ), 'Did not find starting tag.' );
$p->next_tag();
$this->assertSame( 'after', $p->get_attribute( 'id' ), 'Did not properly close improperly-closed comment.' );
$p->next_tag();
$this->assertSame( 'final', $p->get_attribute( 'id' ), 'Did not skip over unopened comment-closer.' );
}
/**
* Ensures that unclosed and invalid comments don't trigger warnings or errors.
*
* @ticket 58007
*
* @covers WP_HTML_Tag_Processor::next_tag
*
* @dataProvider data_html_with_unclosed_comments
*
* @param string $html_ending_before_comment_close HTML with opened comments that aren't closed
*/
public function test_documents_may_end_with_unclosed_comment( $html_ending_before_comment_close ) {
$p = new WP_HTML_Tag_Processor( $html_ending_before_comment_close );
$this->assertFalse( $p->next_tag() );
}
/**
* Data provider.
*
* @return array[]
*/
public function data_html_with_unclosed_comments() {
return array(
'Shortest open valid comment' => array( '<!--' ),
'Basic truncated comment' => array( '<!-- this ends --' ),
'Comment with closer look-alike' => array( '<!-- this ends --x' ),
'Comment with closer look-alike 2' => array( '<!-- this ends --!x' ),
'Invalid tag-closer comment' => array( '</(when will this madness end?)' ),
'Invalid tag-closer comment 2' => array( '</(when will this madness end?)--' ),
);
}
/**
* Ensures that abruptly-closed empty comments are properly closed.
*
* @ticket 58007
*
* @covers WP_HTML_Tag_Processor::next_tag
*
* @dataProvider data_abruptly_closed_empty_comments
*
* @param string $html_with_after_marker HTML to test with "id=after" on element immediately following an abruptly closed comment.
*/
public function test_closes_abrupt_closing_of_empty_comment( $html_with_after_marker ) {
$p = new WP_HTML_Tag_Processor( $html_with_after_marker );
$p->next_tag();
$p->next_tag();
$this->assertSame( 'after', $p->get_attribute( 'id' ), 'Did not find tag after closing abruptly-closed comment' );
}
/**
* Data provider.
*
* @return array[]
*/
public function data_abruptly_closed_empty_comments() {
return array(
'Empty comment with two dashes only' => array( '<hr><!--><hr id=after>' ),
'Empty comment with two dashes only, improperly closed' => array( '<hr><!--!><hr id=inside>--><hr id=after>' ),
'Comment with two dashes only, improperly closed twice' => array( '<hr><!--!><hr id=inside>--!><hr id=after>' ),
'Empty comment with three dashes' => array( '<hr><!---><hr id=after>' ),
'Empty comment with three dashes, improperly closed' => array( '<hr><!---!><hr id=inside>--><hr id=after>' ),
'Comment with three dashes, improperly closed twice' => array( '<hr><!---!><hr id=inside>--!><hr id=after>' ),
'Empty comment with four dashes' => array( '<hr><!----><hr id=after>' ),
'Empty comment with four dashes, improperly closed' => array( '<hr><!----!><hr id=after>--><hr id=final>' ),
'Comment with four dashes, improperly closed twice' => array( '<hr><!----!><hr id=after>--!><hr id=final>' ),
'Comment with almost-closer inside' => array( '<hr><!-- ---!><hr id=after>--!><hr id=final>' ),
);
}
/**
* @ticket 56299
*