HTML API: Add support for BR, EMBED, & other tags.

Adds support for the following HTML elements to the HTML Processor:

 - AREA, BR, EMBED, KEYGEN, WBR
 - Only the opening BR tag is supported, as the invalid closer `</br>`
   involves more complicated rules, to be implemented later.

Previously, these elements were not supported and the HTML Processor
would bail when encountering them. With this patch it will proceed to
parse an HTML document when encountering those tags as long as other
normal conditions don't cause it to bail (such as complicated format
reconstruction rules).

Props jonsurrell, dmsnell
Fixes #60283



git-svn-id: https://develop.svn.wordpress.org/trunk@57316 602fd350-edb4-49c9-b593-d223f7449a82
This commit is contained in:
Dennis Snell 2024-01-19 21:40:01 +00:00
parent 5815624ead
commit 91e51f92a8
4 changed files with 127 additions and 19 deletions

View File

@ -102,17 +102,17 @@
* - Containers: ADDRESS, BLOCKQUOTE, DETAILS, DIALOG, DIV, FOOTER, HEADER, MAIN, MENU, SPAN, SUMMARY.
* - Custom elements: All custom elements are supported. :)
* - Form elements: BUTTON, DATALIST, FIELDSET, LABEL, LEGEND, METER, PROGRESS, SEARCH.
* - Formatting elements: B, BIG, CODE, EM, FONT, I, SMALL, STRIKE, STRONG, TT, U.
* - Formatting elements: B, BIG, CODE, EM, FONT, I, SMALL, STRIKE, STRONG, TT, U, WBR.
* - Heading elements: H1, H2, H3, H4, H5, H6, HGROUP.
* - Links: A.
* - Lists: DD, DL, DT, LI, OL, LI.
* - Media elements: AUDIO, CANVAS, FIGCAPTION, FIGURE, IMG, MAP, PICTURE, VIDEO.
* - Paragraph: P.
* - Phrasing elements: ABBR, BDI, BDO, CITE, DATA, DEL, DFN, INS, MARK, OUTPUT, Q, SAMP, SUB, SUP, TIME, VAR.
* - Media elements: AUDIO, CANVAS, EMBED, FIGCAPTION, FIGURE, IMG, MAP, PICTURE, VIDEO.
* - Paragraph: BR, P.
* - Phrasing elements: AREA, ABBR, BDI, BDO, CITE, DATA, DEL, DFN, INS, MARK, OUTPUT, Q, SAMP, SUB, SUP, TIME, VAR.
* - Sectioning elements: ARTICLE, ASIDE, HR, NAV, SECTION.
* - Templating elements: SLOT.
* - Text decoration: RUBY.
* - Deprecated elements: ACRONYM, BLINK, CENTER, DIR, ISINDEX, MULTICOL, NEXTID, SPACER.
* - Deprecated elements: ACRONYM, BLINK, CENTER, DIR, ISINDEX, KEYGEN, MULTICOL, NEXTID, SPACER.
*
* ### Supported markup
*
@ -934,12 +934,28 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
$this->run_adoption_agency_algorithm();
return true;
/*
* > An end tag whose tag name is "br"
* > Parse error. Drop the attributes from the token, and act as described in the next
* > entry; i.e. act as if this was a "br" start tag token with no attributes, rather
* > than the end tag token that it actually is.
*/
case '-BR':
$this->last_error = self::ERROR_UNSUPPORTED;
throw new WP_HTML_Unsupported_Exception( 'Closing BR tags require unimplemented special handling.' );
/*
* > A start tag whose tag name is one of: "area", "br", "embed", "img", "keygen", "wbr"
*/
case '+AREA':
case '+BR':
case '+EMBED':
case '+IMG':
case '+KEYGEN':
case '+WBR':
$this->reconstruct_active_formatting_elements();
$this->insert_html_element( $this->state->current_token );
$this->state->frameset_ok = false;
return true;
/*
@ -977,13 +993,11 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
case 'BASEFONT':
case 'BGSOUND':
case 'BODY':
case 'BR':
case 'CAPTION':
case 'COL':
case 'COLGROUP':
case 'DD':
case 'DT':
case 'EMBED':
case 'FORM':
case 'FRAME':
case 'FRAMESET':
@ -991,7 +1005,6 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
case 'HTML':
case 'IFRAME':
case 'INPUT':
case 'KEYGEN':
case 'LI':
case 'LINK':
case 'LISTING':
@ -1031,7 +1044,6 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
case 'TR':
case 'TRACK':
case 'UL':
case 'WBR':
case 'XMP':
$this->last_error = self::ERROR_UNSUPPORTED;
throw new WP_HTML_Unsupported_Exception( "Cannot process {$tag_name} element." );
@ -1692,6 +1704,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
'IMG' === $tag_name ||
'INPUT' === $tag_name ||
'LINK' === $tag_name ||
'KEYGEN' === $tag_name || // Obsolete but still treated as void.
'META' === $tag_name ||
'SOURCE' === $tag_name ||
'TRACK' === $tag_name ||

View File

@ -132,6 +132,86 @@ class Tests_HtmlApi_WpHtmlProcessor extends WP_UnitTestCase {
$this->assertFalse( $p->next_tag( 'EM' ), 'Should have aborted before finding second EM as it required reconstructing the first EM.' );
}
/**
* Ensure non-nesting tags do not nest.
*
* @ticket 60283
*
* @covers WP_HTML_Processor::step_in_body
* @covers WP_HTML_Processor::is_void
*
* @dataProvider data_void_tags
*
* @param string $tag_name Name of void tag under test.
*/
public function test_cannot_nest_void_tags( $tag_name ) {
$processor = WP_HTML_Processor::create_fragment( "<{$tag_name}><div>" );
/*
* This HTML represents the same as the following HTML,
* assuming that it were provided `<img>` as the tag:
*
* <html>
* <body>
* <img>
* <div></div>
* </body>
* </html>
*/
$found_tag = $processor->next_tag();
if ( WP_HTML_Processor::ERROR_UNSUPPORTED === $processor->get_last_error() ) {
$this->markTestSkipped( "Tag {$tag_name} is not supported." );
}
$this->assertTrue(
$found_tag,
"Could not find first {$tag_name}."
);
$this->assertSame(
array( 'HTML', 'BODY', $tag_name ),
$processor->get_breadcrumbs(),
'Found incorrect nesting of first element.'
);
$this->assertTrue(
$processor->next_tag(),
'Should have found the DIV as the second tag.'
);
$this->assertSame(
array( 'HTML', 'BODY', 'DIV' ),
$processor->get_breadcrumbs(),
"DIV should have been a sibling of the {$tag_name}."
);
}
/**
* Data provider.
*
* @return array[]
*/
public function data_void_tags() {
return array(
'AREA' => array( 'AREA' ),
'BASE' => array( 'BASE' ),
'BR' => array( 'BR' ),
'COL' => array( 'COL' ),
'EMBED' => array( 'EMBED' ),
'HR' => array( 'HR' ),
'IMG' => array( 'IMG' ),
'INPUT' => array( 'INPUT' ),
'KEYGEN' => array( 'KEYGEN' ),
'LINK' => array( 'LINK' ),
'META' => array( 'META' ),
'SOURCE' => array( 'SOURCE' ),
'TRACK' => array( 'TRACK' ),
'WBR' => array( 'WBR' ),
);
}
/**
* Ensures that special handling of unsupported tags is cleaned up
* as handling is implemented. Otherwise there's risk of leaving special
@ -159,16 +239,13 @@ class Tests_HtmlApi_WpHtmlProcessor extends WP_UnitTestCase {
public function data_unsupported_special_in_body_tags() {
return array(
'APPLET' => array( 'APPLET' ),
'AREA' => array( 'AREA' ),
'BASE' => array( 'BASE' ),
'BASEFONT' => array( 'BASEFONT' ),
'BGSOUND' => array( 'BGSOUND' ),
'BODY' => array( 'BODY' ),
'BR' => array( 'BR' ),
'CAPTION' => array( 'CAPTION' ),
'COL' => array( 'COL' ),
'COLGROUP' => array( 'COLGROUP' ),
'EMBED' => array( 'EMBED' ),
'FORM' => array( 'FORM' ),
'FRAME' => array( 'FRAME' ),
'FRAMESET' => array( 'FRAMESET' ),
@ -176,7 +253,6 @@ class Tests_HtmlApi_WpHtmlProcessor extends WP_UnitTestCase {
'HTML' => array( 'HTML' ),
'IFRAME' => array( 'IFRAME' ),
'INPUT' => array( 'INPUT' ),
'KEYGEN' => array( 'KEYGEN' ),
'LINK' => array( 'LINK' ),
'LISTING' => array( 'LISTING' ),
'MARQUEE' => array( 'MARQUEE' ),
@ -213,7 +289,6 @@ class Tests_HtmlApi_WpHtmlProcessor extends WP_UnitTestCase {
'TITLE' => array( 'TITLE' ),
'TR' => array( 'TR' ),
'TRACK' => array( 'TRACK' ),
'WBR' => array( 'WBR' ),
'XMP' => array( 'XMP' ),
);
}

View File

@ -162,15 +162,12 @@ class Tests_HtmlApi_WpHtmlProcessorBreadcrumbs extends WP_UnitTestCase {
public function data_unsupported_elements() {
$unsupported_elements = array(
'APPLET', // Deprecated.
'AREA',
'BASE',
'BGSOUND', // Deprecated; self-closing if self-closing flag provided, otherwise normal.
'BODY',
'BR',
'CAPTION',
'COL',
'COLGROUP',
'EMBED',
'FORM',
'FRAME',
'FRAMESET',
@ -178,7 +175,6 @@ class Tests_HtmlApi_WpHtmlProcessorBreadcrumbs extends WP_UnitTestCase {
'HTML',
'IFRAME',
'INPUT',
'KEYGEN', // Deprecated; void.
'LINK',
'LISTING', // Deprecated, use PRE instead.
'MARQUEE', // Deprecated.
@ -213,7 +209,6 @@ class Tests_HtmlApi_WpHtmlProcessorBreadcrumbs extends WP_UnitTestCase {
'TITLE',
'TR',
'TRACK',
'WBR',
'XMP', // Deprecated, use PRE instead.
);

View File

@ -392,4 +392,29 @@ class Tests_HtmlApi_WpHtmlProcessorSemanticRules extends WP_UnitTestCase {
$this->assertSame( 'DIV', $p->get_tag(), "Expected to find DIV element, but found {$p->get_tag()} instead." );
$this->assertSame( array( 'HTML', 'BODY', 'DIV', 'DIV' ), $p->get_breadcrumbs(), 'Failed to produce expected DOM nesting: SPAN should be closed and DIV should be its sibling.' );
}
/**
* Ensures that support isn't accidentally partially added for the closing BR tag `</br>`.
*
* This tag closer has special rules and support shouldn't be added without implementing full support.
*
* > An end tag whose tag name is "br"
* > Parse error. Drop the attributes from the token, and act as described in the next entry;
* > i.e. act as if this was a "br" start tag token with no attributes, rather than the end
* > tag token that it actually is.
*
* When this handling is implemented, this test should be removed. It's not incorporated
* into the existing unsupported tag behavior test because the opening tag is supported;
* only the closing tag isn't.
*
* @covers WP_HTML_Processor::step_in_body
*
* @ticket 60283
*/
public function test_br_end_tag_unsupported() {
$p = WP_HTML_Processor::create_fragment( '</br>' );
$this->assertFalse( $p->next_tag(), 'Found a BR tag that should not be handled.' );
$this->assertSame( WP_HTML_Processor::ERROR_UNSUPPORTED, $p->get_last_error() );
}
}