mirror of
git://develop.git.wordpress.org/
synced 2025-03-24 05:49:49 +01:00
HTML API: Fix extensibility of WP_HTML_Processor::next_token().
Break out logic from the next_token() method into a private method which may call itself recursively. This allows for subclasses to override the next_token() method and be assured that each call to next_token() corresponds with the consumption of one single token. This also parallels how WP_HTML_Tag_Processor::next_token() wraps a private base_class_next_token() method. Reviewed by jonsurrell. Merges [59285], [59364], and [59747] to 6.7 branch. Props westonruter, jonsurrell, dmsnell, jorbin. git-svn-id: https://develop.svn.wordpress.org/branches/6.7@59757 602fd350-edb4-49c9-b593-d223f7449a82
This commit is contained in:
parent
1c02e689d7
commit
2a37cf00a9
@ -607,6 +607,22 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the next token in the HTML document.
|
||||
*
|
||||
* This doesn't currently have a way to represent non-tags and doesn't process
|
||||
* semantic rules for text nodes. For access to the raw tokens consider using
|
||||
* WP_HTML_Tag_Processor instead.
|
||||
*
|
||||
* @since 6.5.0 Added for internal support; do not use.
|
||||
* @since 6.7.2 Refactored so subclasses may extend.
|
||||
*
|
||||
* @return bool Whether a token was parsed.
|
||||
*/
|
||||
public function next_token(): bool {
|
||||
return $this->next_visitable_token();
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensures internal accounting is maintained for HTML semantic rules while
|
||||
* the underlying Tag Processor class is seeking to a bookmark.
|
||||
@ -615,13 +631,18 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
|
||||
* semantic rules for text nodes. For access to the raw tokens consider using
|
||||
* WP_HTML_Tag_Processor instead.
|
||||
*
|
||||
* @since 6.5.0 Added for internal support; do not use.
|
||||
* Note that this method may call itself recursively. This is why it is not
|
||||
* implemented as {@see WP_HTML_Processor::next_token()}, which instead calls
|
||||
* this method similarly to how {@see WP_HTML_Tag_Processor::next_token()}
|
||||
* calls the {@see WP_HTML_Tag_Processor::base_class_next_token()} method.
|
||||
*
|
||||
* @since 6.7.2 Added for internal support.
|
||||
*
|
||||
* @access private
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
public function next_token(): bool {
|
||||
private function next_visitable_token(): bool {
|
||||
$this->current_element = null;
|
||||
|
||||
if ( isset( $this->last_error ) ) {
|
||||
@ -639,7 +660,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
|
||||
* tokens works in the meantime and isn't obviously wrong.
|
||||
*/
|
||||
if ( empty( $this->element_queue ) && $this->step() ) {
|
||||
return $this->next_token();
|
||||
return $this->next_visitable_token();
|
||||
}
|
||||
|
||||
// Process the next event on the queue.
|
||||
@ -650,7 +671,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
|
||||
continue;
|
||||
}
|
||||
|
||||
return empty( $this->element_queue ) ? false : $this->next_token();
|
||||
return empty( $this->element_queue ) ? false : $this->next_visitable_token();
|
||||
}
|
||||
|
||||
$is_pop = WP_HTML_Stack_Event::POP === $this->current_element->operation;
|
||||
@ -661,7 +682,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
|
||||
* the breadcrumbs.
|
||||
*/
|
||||
if ( 'root-node' === $this->current_element->token->bookmark_name ) {
|
||||
return $this->next_token();
|
||||
return $this->next_visitable_token();
|
||||
}
|
||||
|
||||
// Adjust the breadcrumbs for this event.
|
||||
@ -673,7 +694,7 @@ class WP_HTML_Processor extends WP_HTML_Tag_Processor {
|
||||
|
||||
// Avoid sending close events for elements which don't expect a closing.
|
||||
if ( $is_pop && ! $this->expects_closer( $this->current_element->token ) ) {
|
||||
return $this->next_token();
|
||||
return $this->next_visitable_token();
|
||||
}
|
||||
|
||||
return true;
|
||||
|
@ -0,0 +1,35 @@
|
||||
<?php
|
||||
|
||||
class Token_Counting_HTML_Processor extends WP_HTML_Processor {
|
||||
|
||||
/**
|
||||
* List of tokens that have already been seen.
|
||||
*
|
||||
* @var array<string, int>
|
||||
*/
|
||||
public $token_seen_count = array();
|
||||
|
||||
/**
|
||||
* Gets next token.
|
||||
*
|
||||
* @return bool Whether next token was matched.
|
||||
*/
|
||||
public function next_token(): bool {
|
||||
$result = parent::next_token();
|
||||
|
||||
if ( $this->get_token_type() === '#tag' ) {
|
||||
$token_name = ( $this->is_tag_closer() ? '-' : '+' ) . $this->get_tag();
|
||||
} else {
|
||||
$token_name = $this->get_token_name();
|
||||
}
|
||||
|
||||
if ( ! isset( $this->token_seen_count[ $token_name ] ) ) {
|
||||
$this->token_seen_count[ $token_name ] = 1;
|
||||
} else {
|
||||
++$this->token_seen_count[ $token_name ];
|
||||
}
|
||||
|
||||
return $result;
|
||||
}
|
||||
|
||||
}
|
@ -908,6 +908,141 @@ class Tests_HtmlApi_WpHtmlProcessor extends WP_UnitTestCase {
|
||||
$this->assertTrue( $processor->is_tag_closer() );
|
||||
}
|
||||
|
||||
/**
|
||||
* Data provider.
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function data_html_processor_with_extended_next_token() {
|
||||
return array(
|
||||
'single_instance_per_tag' => array(
|
||||
'html' => '
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title>Hello World</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Hello World!</h1>
|
||||
<img src="example.png">
|
||||
<p>Each tag should occur only once in this document.<!--Closing P tag omitted intentionally.-->
|
||||
<footer>The end.</footer>
|
||||
</body>
|
||||
</html>
|
||||
',
|
||||
'expected_token_counts' => array(
|
||||
'+HTML' => 1,
|
||||
'+HEAD' => 1,
|
||||
'#text' => 14,
|
||||
'+META' => 1,
|
||||
'+TITLE' => 1,
|
||||
'-HEAD' => 1,
|
||||
'+BODY' => 1,
|
||||
'+H1' => 1,
|
||||
'-H1' => 1,
|
||||
'+IMG' => 1,
|
||||
'+P' => 1,
|
||||
'#comment' => 1,
|
||||
'-P' => 1,
|
||||
'+FOOTER' => 1,
|
||||
'-FOOTER' => 1,
|
||||
'-BODY' => 1,
|
||||
'-HTML' => 1,
|
||||
'' => 1,
|
||||
),
|
||||
),
|
||||
|
||||
'multiple_tag_instances' => array(
|
||||
'html' => '
|
||||
<html>
|
||||
<body>
|
||||
<h1>Hello World!</h1>
|
||||
<p>First
|
||||
<p>Second
|
||||
<p>Third
|
||||
<ul>
|
||||
<li>1
|
||||
<li>2
|
||||
<li>3
|
||||
</ul>
|
||||
</body>
|
||||
</html>
|
||||
',
|
||||
'expected_token_counts' => array(
|
||||
'+HTML' => 1,
|
||||
'+HEAD' => 1,
|
||||
'-HEAD' => 1,
|
||||
'+BODY' => 1,
|
||||
'#text' => 13,
|
||||
'+H1' => 1,
|
||||
'-H1' => 1,
|
||||
'+P' => 3,
|
||||
'-P' => 3,
|
||||
'+UL' => 1,
|
||||
'+LI' => 3,
|
||||
'-LI' => 3,
|
||||
'-UL' => 1,
|
||||
'-BODY' => 1,
|
||||
'-HTML' => 1,
|
||||
'' => 1,
|
||||
),
|
||||
),
|
||||
|
||||
'extreme_nested_formatting' => array(
|
||||
'html' => '
|
||||
<html>
|
||||
<body>
|
||||
<p>
|
||||
<strong><em><strike><i><b><u>FORMAT</u></b></i></strike></em></strong>
|
||||
</p>
|
||||
</body>
|
||||
</html>
|
||||
',
|
||||
'expected_token_counts' => array(
|
||||
'+HTML' => 1,
|
||||
'+HEAD' => 1,
|
||||
'-HEAD' => 1,
|
||||
'+BODY' => 1,
|
||||
'#text' => 7,
|
||||
'+P' => 1,
|
||||
'+STRONG' => 1,
|
||||
'+EM' => 1,
|
||||
'+STRIKE' => 1,
|
||||
'+I' => 1,
|
||||
'+B' => 1,
|
||||
'+U' => 1,
|
||||
'-U' => 1,
|
||||
'-B' => 1,
|
||||
'-I' => 1,
|
||||
'-STRIKE' => 1,
|
||||
'-EM' => 1,
|
||||
'-STRONG' => 1,
|
||||
'-P' => 1,
|
||||
'-BODY' => 1,
|
||||
'-HTML' => 1,
|
||||
'' => 1,
|
||||
),
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensures that subclasses to WP_HTML_Processor can do bookkeeping by extending the next_token() method.
|
||||
*
|
||||
* @ticket 62269
|
||||
* @dataProvider data_html_processor_with_extended_next_token
|
||||
*/
|
||||
public function test_ensure_next_token_method_extensibility( $html, $expected_token_counts ) {
|
||||
require_once DIR_TESTDATA . '/html-api/token-counting-html-processor.php';
|
||||
|
||||
$processor = Token_Counting_HTML_Processor::create_full_parser( $html );
|
||||
while ( $processor->next_tag() ) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$this->assertEquals( $expected_token_counts, $processor->token_seen_count, 'Snapshot: ' . var_export( $processor->token_seen_count, true ) );
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensure that lowercased tag_name query matches tags case-insensitively.
|
||||
*
|
||||
|
Loading…
x
Reference in New Issue
Block a user