HTML API: Switch to HTML namespace when entering Integration Points.

When encountering inline SVG and MathML content in an HTML document, there are certain "integration points" which transition back into the HTML parsing ruleset. Previously, the HTML API was incorrectly switching into the namespace of the element transitioning into that ruleset.

In this patch, the correct transition is made, where all integration points refer to HTML rules, while non-integration points refer to the rules of the namespace corresponding to the token itself.

Developed in https://github.com/wordpress/wordpress-develop/pull/7425
Discussed in https://core.trac.wordpress.org/ticket/61576

Props dmsnell, jonsurrell.
See .


git-svn-id: https://develop.svn.wordpress.org/trunk@59099 602fd350-edb4-49c9-b593-d223f7449a82
This commit is contained in:
Dennis Snell 2024-09-27 00:42:47 +00:00
parent bbec266c74
commit 1e21ecedf1
2 changed files with 126 additions and 6 deletions
src/wp-includes/html-api
tests/phpunit/tests/html-api

@ -393,7 +393,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
$provenance = ( ! $same_node || $is_virtual ) ? 'virtual' : 'real';
$this->element_queue[] = new WP_HTML_Stack_Event( $token, WP_HTML_Stack_Event::PUSH, $provenance );
$this->change_parsing_namespace( $token->namespace );
$this->change_parsing_namespace( $token->integration_node_type ? 'html' : $token->namespace );
}
);
@ -403,12 +403,14 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
$same_node = isset( $this->state->current_token ) && $token->node_name === $this->state->current_token->node_name;
$provenance = ( ! $same_node || $is_virtual ) ? 'virtual' : 'real';
$this->element_queue[] = new WP_HTML_Stack_Event( $token, WP_HTML_Stack_Event::POP, $provenance );
$adjusted_current_node = $this->get_adjusted_current_node();
$this->change_parsing_namespace(
$adjusted_current_node
? $adjusted_current_node->namespace
: 'html'
);
if ( $adjusted_current_node ) {
$this->change_parsing_namespace( $adjusted_current_node->integration_node_type ? 'html' : $adjusted_current_node->namespace );
} else {
$this->change_parsing_namespace( 'html' );
}
}
);

@ -745,4 +745,122 @@ class Tests_HtmlApi_WpHtmlProcessor extends WP_UnitTestCase {
$class_list
);
}
/**
* Ensures that the processor correctly adjusts the namespace
* for elements inside HTML integration points.
*
* @ticket 61576
*/
public function test_adjusts_for_html_integration_points_in_svg() {
$processor = WP_HTML_Processor::create_full_parser(
'<svg><foreignobject><image /><svg /><image />'
);
// At the foreignObject, the processor is in the SVG namespace.
$this->assertTrue(
$processor->next_tag( 'foreignObject' ),
'Failed to find "foreignObject" under test: check test setup.'
);
$this->assertSame(
'svg',
$processor->get_namespace(),
'Found the wrong namespace for the "foreignObject" element.'
);
/*
* The IMAGE tag should be handled according to HTML processing rules
* and transformted to an IMG tag because `foreignObject` is an HTML
* integration point. At this point, the processor is entering the HTML
* integration point.
*/
$this->assertTrue(
$processor->next_tag( 'IMG' ),
'Failed to find expected "IMG" tag from "<IMAGE>" source tag.'
);
$this->assertSame(
'html',
$processor->get_namespace(),
'Found the wrong namespace for the transformed "IMAGE"/"IMG" element.'
);
/*
* Again, the IMAGE tag should be handled according to HTML processing
* rules and transformted to an IMG tag because `foreignObject` is an
* HTML integration point. At this point, the processor is has entered
* SVG and is returning to an HTML integration point.
*/
$this->assertTrue(
$processor->next_tag( 'IMG' ),
'Failed to find expected "IMG" tag from "<IMAGE>" source tag.'
);
$this->assertSame(
'html',
$processor->get_namespace(),
'Found the wrong namespace for the transformed "IMAGE"/"IMG" element.'
);
}
/**
* Ensures that the processor correctly adjusts the namespace
* for elements inside MathML integration points.
*
* @ticket 61576
*/
public function test_adjusts_for_mathml_integration_points() {
$processor = WP_HTML_Processor::create_fragment(
'<mo><image /></mo><math><image /><mo><image /></mo></math>'
);
// Advance token-by-token to ensure matching the right raw "<image />" token.
$processor->next_token(); // Advance past the +MO.
$processor->next_token(); // Advance into the +IMG.
$this->assertSame(
'IMG',
$processor->get_tag(),
'Failed to find expected "IMG" tag from "<IMAGE>" source tag.'
);
$this->assertSame(
'html',
$processor->get_namespace(),
'Found the wrong namespace for the transformed "IMAGE"/"IMG" element.'
);
// Advance token-by-token to ensure matching the right raw "<image />" token.
$processor->next_token(); // Advance past the -MO.
$processor->next_token(); // Advance past the +MATH.
$processor->next_token(); // Advance into the +IMAGE.
$this->assertSame(
'IMAGE',
$processor->get_tag(),
'Failed to find the un-transformed "<image />" tag.'
);
$this->assertSame(
'math',
$processor->get_namespace(),
'Found the wrong namespace for the transformed "IMAGE"/"IMG" element.'
);
$processor->next_token(); // Advance past the +MO.
$processor->next_token(); // Advance into the +IMG.
$this->assertSame(
'IMG',
$processor->get_tag(),
'Failed to find expected "IMG" tag from "<IMAGE>" source tag.'
);
$this->assertSame(
'html',
$processor->get_namespace(),
'Found the wrong namespace for the transformed "IMAGE"/"IMG" element.'
);
}
}