mirror of
git://develop.git.wordpress.org/
synced 2025-07-14 12:16:21 +02:00
HTML API: Fix extensibility of WP_HTML_Processor::next_token()
.
Break out logic from the `next_token()` method into a private method which may call itself recursively. This allows for subclasses to override the `next_token()` method and be assured that each call to `next_token()` corresponds with the consumption of one single token. This also parallels how `WP_HTML_Tag_Processor::next_token()` wraps a private `base_class_next_token()` method. Props westonruter, jonsurrell. Fixes #62269. git-svn-id: https://develop.svn.wordpress.org/trunk@59285 602fd350-edb4-49c9-b593-d223f7449a82
This commit is contained in:
@ -603,6 +603,22 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Finds the next token in the HTML document.
|
||||||
|
*
|
||||||
|
* This doesn't currently have a way to represent non-tags and doesn't process
|
||||||
|
* semantic rules for text nodes. For access to the raw tokens consider using
|
||||||
|
* WP_HTML_Tag_Processor instead.
|
||||||
|
*
|
||||||
|
* @since 6.5.0 Added for internal support; do not use.
|
||||||
|
* @since 6.7.1 Refactored so subclasses may extend.
|
||||||
|
*
|
||||||
|
* @return bool Whether a token was parsed.
|
||||||
|
*/
|
||||||
|
public function next_token(): bool {
|
||||||
|
return $this->_next_token();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Ensures internal accounting is maintained for HTML semantic rules while
|
* Ensures internal accounting is maintained for HTML semantic rules while
|
||||||
* the underlying Tag Processor class is seeking to a bookmark.
|
* the underlying Tag Processor class is seeking to a bookmark.
|
||||||
@ -611,13 +627,13 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
|
|||||||
* semantic rules for text nodes. For access to the raw tokens consider using
|
* semantic rules for text nodes. For access to the raw tokens consider using
|
||||||
* WP_HTML_Tag_Processor instead.
|
* WP_HTML_Tag_Processor instead.
|
||||||
*
|
*
|
||||||
* @since 6.5.0 Added for internal support; do not use.
|
* @since 6.7.1 Added for internal support; do not use.
|
||||||
*
|
*
|
||||||
* @access private
|
* @access private
|
||||||
*
|
*
|
||||||
* @return bool
|
* @return bool
|
||||||
*/
|
*/
|
||||||
public function next_token(): bool {
|
private function _next_token(): bool {
|
||||||
$this->current_element = null;
|
$this->current_element = null;
|
||||||
|
|
||||||
if ( isset( $this->last_error ) ) {
|
if ( isset( $this->last_error ) ) {
|
||||||
@ -635,7 +651,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
|
|||||||
* tokens works in the meantime and isn't obviously wrong.
|
* tokens works in the meantime and isn't obviously wrong.
|
||||||
*/
|
*/
|
||||||
if ( empty( $this->element_queue ) && $this->step() ) {
|
if ( empty( $this->element_queue ) && $this->step() ) {
|
||||||
return $this->next_token();
|
return $this->_next_token();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Process the next event on the queue.
|
// Process the next event on the queue.
|
||||||
@ -646,7 +662,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
return empty( $this->element_queue ) ? false : $this->next_token();
|
return empty( $this->element_queue ) ? false : $this->_next_token();
|
||||||
}
|
}
|
||||||
|
|
||||||
$is_pop = WP_HTML_Stack_Event::POP === $this->current_element->operation;
|
$is_pop = WP_HTML_Stack_Event::POP === $this->current_element->operation;
|
||||||
@ -657,7 +673,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
|
|||||||
* the breadcrumbs.
|
* the breadcrumbs.
|
||||||
*/
|
*/
|
||||||
if ( 'root-node' === $this->current_element->token->bookmark_name ) {
|
if ( 'root-node' === $this->current_element->token->bookmark_name ) {
|
||||||
return $this->next_token();
|
return $this->_next_token();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Adjust the breadcrumbs for this event.
|
// Adjust the breadcrumbs for this event.
|
||||||
@ -669,7 +685,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
|
|||||||
|
|
||||||
// Avoid sending close events for elements which don't expect a closing.
|
// Avoid sending close events for elements which don't expect a closing.
|
||||||
if ( $is_pop && ! $this->expects_closer( $this->current_element->token ) ) {
|
if ( $is_pop && ! $this->expects_closer( $this->current_element->token ) ) {
|
||||||
return $this->next_token();
|
return $this->_next_token();
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
@ -0,0 +1,88 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
class HTML_XPath_Generating_Processor extends WP_HTML_Processor {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* List of tokens that have already been seen.
|
||||||
|
*
|
||||||
|
* @var array<string, int>
|
||||||
|
*/
|
||||||
|
public $token_seen_count = array();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Previous depth.
|
||||||
|
*
|
||||||
|
* @var int
|
||||||
|
*/
|
||||||
|
private $previous_depth = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Open stack indices.
|
||||||
|
*
|
||||||
|
* @since n.e.x.t
|
||||||
|
* @var array<int, array{tag_name: string, index: int}>
|
||||||
|
*/
|
||||||
|
private $open_stack_indices = array();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets XPath for the current open tag.
|
||||||
|
*
|
||||||
|
* @return string XPath.
|
||||||
|
*/
|
||||||
|
public function get_xpath(): string {
|
||||||
|
$xpath = '';
|
||||||
|
foreach ( $this->open_stack_indices as $level ) {
|
||||||
|
$xpath .= sprintf( '/*[%d][self::%s]', $level['index'] + 1, $level['tag_name'] );
|
||||||
|
}
|
||||||
|
return $xpath;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets next token.
|
||||||
|
*
|
||||||
|
* @return bool Whether next token was matched.
|
||||||
|
*/
|
||||||
|
public function next_token(): bool {
|
||||||
|
$result = parent::next_token();
|
||||||
|
$current_depth = $this->get_current_depth();
|
||||||
|
$current_tag = $this->get_tag();
|
||||||
|
|
||||||
|
$current_depth--; // Because HTML starts at depth 1.
|
||||||
|
|
||||||
|
if ( $this->get_token_type() === '#tag' ) {
|
||||||
|
$token_name = ( $this->is_tag_closer() ? '-' : '+' ) . $current_tag;
|
||||||
|
} else {
|
||||||
|
$token_name = $this->get_token_name();
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( ! isset( $this->token_seen_count[ $token_name ] ) ) {
|
||||||
|
$this->token_seen_count[ $token_name ] = 1;
|
||||||
|
} else {
|
||||||
|
++$this->token_seen_count[ $token_name ];
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( $this->get_token_type() === '#tag' && ! $this->is_tag_closer() ) {
|
||||||
|
if ( $current_depth < $this->previous_depth ) {
|
||||||
|
array_splice(
|
||||||
|
$this->open_stack_indices,
|
||||||
|
$current_depth + 1
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( ! isset( $this->open_stack_indices[ $current_depth ] ) ) {
|
||||||
|
$this->open_stack_indices[ $current_depth ] = array(
|
||||||
|
'tag_name' => $current_tag,
|
||||||
|
'index' => 0,
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
$this->open_stack_indices[ $current_depth ]['tag_name'] = $current_tag;
|
||||||
|
++$this->open_stack_indices[ $current_depth ]['index'];
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->previous_depth = $current_depth;
|
||||||
|
}
|
||||||
|
|
||||||
|
return $result;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -882,4 +882,180 @@ class Tests_HtmlApi_WpHtmlProcessor extends WP_UnitTestCase {
|
|||||||
$this->assertSame( 'FORM', $processor->get_tag() );
|
$this->assertSame( 'FORM', $processor->get_tag() );
|
||||||
$this->assertTrue( $processor->is_tag_closer() );
|
$this->assertTrue( $processor->is_tag_closer() );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Data provider.
|
||||||
|
*
|
||||||
|
* @return array
|
||||||
|
*/
|
||||||
|
public function data_html_processor_with_extended_next_token() {
|
||||||
|
return array(
|
||||||
|
'single_instance_per_tag' => array(
|
||||||
|
'html' => '
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8">
|
||||||
|
<title>Hello World</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<h1>Hello World!</h1>
|
||||||
|
<img src="example.png">
|
||||||
|
<p>Each tag should occur only once in this document.<!--Closing P tag omitted intentionally.-->
|
||||||
|
<footer>The end.</footer>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
',
|
||||||
|
'expected_token_counts' => array(
|
||||||
|
'+HTML' => 1,
|
||||||
|
'+HEAD' => 1,
|
||||||
|
'#text' => 14,
|
||||||
|
'+META' => 1,
|
||||||
|
'+TITLE' => 1,
|
||||||
|
'-HEAD' => 1,
|
||||||
|
'+BODY' => 1,
|
||||||
|
'+H1' => 1,
|
||||||
|
'-H1' => 1,
|
||||||
|
'+IMG' => 1,
|
||||||
|
'+P' => 1,
|
||||||
|
'#comment' => 1,
|
||||||
|
'-P' => 1,
|
||||||
|
'+FOOTER' => 1,
|
||||||
|
'-FOOTER' => 1,
|
||||||
|
'-BODY' => 1,
|
||||||
|
'-HTML' => 1,
|
||||||
|
'' => 1,
|
||||||
|
),
|
||||||
|
'expected_xpaths' => array(
|
||||||
|
0 => '/*[1][self::HTML]',
|
||||||
|
1 => '/*[1][self::HTML]/*[1][self::HEAD]',
|
||||||
|
2 => '/*[1][self::HTML]/*[1][self::HEAD]/*[1][self::META]',
|
||||||
|
3 => '/*[1][self::HTML]/*[1][self::HEAD]/*[2][self::TITLE]',
|
||||||
|
4 => '/*[1][self::HTML]/*[2][self::BODY]',
|
||||||
|
5 => '/*[1][self::HTML]/*[2][self::BODY]/*[1][self::H1]',
|
||||||
|
6 => '/*[1][self::HTML]/*[2][self::BODY]/*[2][self::IMG]',
|
||||||
|
7 => '/*[1][self::HTML]/*[2][self::BODY]/*[3][self::P]',
|
||||||
|
8 => '/*[1][self::HTML]/*[2][self::BODY]/*[4][self::FOOTER]',
|
||||||
|
),
|
||||||
|
),
|
||||||
|
|
||||||
|
'multiple_tag_instances' => array(
|
||||||
|
'html' => '
|
||||||
|
<html>
|
||||||
|
<body>
|
||||||
|
<h1>Hello World!</h1>
|
||||||
|
<p>First
|
||||||
|
<p>Second
|
||||||
|
<p>Third
|
||||||
|
<ul>
|
||||||
|
<li>1
|
||||||
|
<li>2
|
||||||
|
<li>3
|
||||||
|
</ul>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
',
|
||||||
|
'expected_token_counts' => array(
|
||||||
|
'+HTML' => 1,
|
||||||
|
'+HEAD' => 1,
|
||||||
|
'-HEAD' => 1,
|
||||||
|
'+BODY' => 1,
|
||||||
|
'#text' => 13,
|
||||||
|
'+H1' => 1,
|
||||||
|
'-H1' => 1,
|
||||||
|
'+P' => 3,
|
||||||
|
'-P' => 3,
|
||||||
|
'+UL' => 1,
|
||||||
|
'+LI' => 3,
|
||||||
|
'-LI' => 3,
|
||||||
|
'-UL' => 1,
|
||||||
|
'-BODY' => 1,
|
||||||
|
'-HTML' => 1,
|
||||||
|
'' => 1,
|
||||||
|
),
|
||||||
|
'expected_xpaths' => array(
|
||||||
|
0 => '/*[1][self::HTML]',
|
||||||
|
1 => '/*[1][self::HTML]/*[1][self::HEAD]',
|
||||||
|
2 => '/*[1][self::HTML]/*[2][self::BODY]',
|
||||||
|
3 => '/*[1][self::HTML]/*[2][self::BODY]/*[1][self::H1]',
|
||||||
|
4 => '/*[1][self::HTML]/*[2][self::BODY]/*[2][self::P]',
|
||||||
|
5 => '/*[1][self::HTML]/*[2][self::BODY]/*[3][self::P]',
|
||||||
|
6 => '/*[1][self::HTML]/*[2][self::BODY]/*[4][self::P]',
|
||||||
|
7 => '/*[1][self::HTML]/*[2][self::BODY]/*[5][self::UL]',
|
||||||
|
8 => '/*[1][self::HTML]/*[2][self::BODY]/*[5][self::UL]/*[1][self::LI]',
|
||||||
|
9 => '/*[1][self::HTML]/*[2][self::BODY]/*[5][self::UL]/*[2][self::LI]',
|
||||||
|
10 => '/*[1][self::HTML]/*[2][self::BODY]/*[5][self::UL]/*[3][self::LI]',
|
||||||
|
),
|
||||||
|
),
|
||||||
|
|
||||||
|
'extreme_nested_formatting' => array(
|
||||||
|
'html' => '
|
||||||
|
<html>
|
||||||
|
<body>
|
||||||
|
<p>
|
||||||
|
<strong><em><strike><i><b><u>FORMAT</u></b></i></strike></em></strong>
|
||||||
|
</p>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
',
|
||||||
|
'expected_token_counts' => array(
|
||||||
|
'+HTML' => 1,
|
||||||
|
'+HEAD' => 1,
|
||||||
|
'-HEAD' => 1,
|
||||||
|
'+BODY' => 1,
|
||||||
|
'#text' => 7,
|
||||||
|
'+P' => 1,
|
||||||
|
'+STRONG' => 1,
|
||||||
|
'+EM' => 1,
|
||||||
|
'+STRIKE' => 1,
|
||||||
|
'+I' => 1,
|
||||||
|
'+B' => 1,
|
||||||
|
'+U' => 1,
|
||||||
|
'-U' => 1,
|
||||||
|
'-B' => 1,
|
||||||
|
'-I' => 1,
|
||||||
|
'-STRIKE' => 1,
|
||||||
|
'-EM' => 1,
|
||||||
|
'-STRONG' => 1,
|
||||||
|
'-P' => 1,
|
||||||
|
'-BODY' => 1,
|
||||||
|
'-HTML' => 1,
|
||||||
|
'' => 1,
|
||||||
|
),
|
||||||
|
'expected_xpaths' => array(
|
||||||
|
0 => '/*[1][self::HTML]',
|
||||||
|
1 => '/*[1][self::HTML]/*[1][self::HEAD]',
|
||||||
|
2 => '/*[1][self::HTML]/*[2][self::BODY]',
|
||||||
|
3 => '/*[1][self::HTML]/*[2][self::BODY]/*[1][self::P]',
|
||||||
|
4 => '/*[1][self::HTML]/*[2][self::BODY]/*[1][self::P]/*[1][self::STRONG]',
|
||||||
|
5 => '/*[1][self::HTML]/*[2][self::BODY]/*[1][self::P]/*[1][self::STRONG]/*[1][self::EM]',
|
||||||
|
6 => '/*[1][self::HTML]/*[2][self::BODY]/*[1][self::P]/*[1][self::STRONG]/*[1][self::EM]/*[1][self::STRIKE]',
|
||||||
|
7 => '/*[1][self::HTML]/*[2][self::BODY]/*[1][self::P]/*[1][self::STRONG]/*[1][self::EM]/*[1][self::STRIKE]/*[1][self::I]',
|
||||||
|
8 => '/*[1][self::HTML]/*[2][self::BODY]/*[1][self::P]/*[1][self::STRONG]/*[1][self::EM]/*[1][self::STRIKE]/*[1][self::I]/*[1][self::B]',
|
||||||
|
9 => '/*[1][self::HTML]/*[2][self::BODY]/*[1][self::P]/*[1][self::STRONG]/*[1][self::EM]/*[1][self::STRIKE]/*[1][self::I]/*[1][self::B]/*[1][self::U]',
|
||||||
|
),
|
||||||
|
),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Ensures that subclasses to WP_HTML_Processor can do bookkeeping by extending the next_token() method.
|
||||||
|
*
|
||||||
|
* @ticket ?
|
||||||
|
* @dataProvider data_html_processor_with_extended_next_token
|
||||||
|
*/
|
||||||
|
public function test_ensure_next_token_method_extensibility( $html, $expected_token_counts, $expected_xpaths ) {
|
||||||
|
require_once DIR_TESTDATA . '/html-api/html-xpath-generating-processor.php';
|
||||||
|
|
||||||
|
$processor = HTML_XPath_Generating_Processor::create_full_parser( $html );
|
||||||
|
$actual_xpaths = array();
|
||||||
|
while ( $processor->next_tag() ) {
|
||||||
|
if ( ! $processor->is_tag_closer() ) {
|
||||||
|
$processor->set_attribute( 'xpath', $processor->get_xpath() );
|
||||||
|
$actual_xpaths[] = $processor->get_xpath();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->assertEquals( $expected_token_counts, $processor->token_seen_count, 'Snapshot: ' . var_export( $processor->token_seen_count, true ) );
|
||||||
|
$this->assertEquals( $expected_xpaths, $actual_xpaths, 'Snapshot: ' . var_export( $actual_xpaths, true ) );
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user