mirror of
https://github.com/ezyang/htmlpurifier.git
synced 2025-08-05 13:47:24 +02:00
Track column numbers in addition to line numbers.
Signed-off-by: Edward Z. Yang <edwardzyang@thewritingpot.com>
This commit is contained in:
@@ -42,6 +42,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
||||
$inside_tag = false; // whether or not we're parsing the inside of a tag
|
||||
$array = array(); // result array
|
||||
|
||||
// This is also treated to mean maintain *column* numbers too
|
||||
$maintain_line_numbers = $config->get('Core', 'MaintainLineNumbers');
|
||||
|
||||
if ($maintain_line_numbers === null) {
|
||||
@@ -50,8 +51,15 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
||||
$maintain_line_numbers = $config->get('Core', 'CollectErrors');
|
||||
}
|
||||
|
||||
if ($maintain_line_numbers) $current_line = 1;
|
||||
else $current_line = false;
|
||||
if ($maintain_line_numbers) {
|
||||
$current_line = 1;
|
||||
$current_col = 0;
|
||||
$length = strlen($html);
|
||||
} else {
|
||||
$current_line = false;
|
||||
$current_col = false;
|
||||
$length = false;
|
||||
}
|
||||
$context->register('CurrentLine', $current_line);
|
||||
$nl = "\n";
|
||||
// how often to manually recalculate. This will ALWAYS be right,
|
||||
@@ -68,14 +76,31 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
||||
|
||||
while(++$loops) {
|
||||
|
||||
// recalculate lines
|
||||
if (
|
||||
$maintain_line_numbers && // line number tracking is on
|
||||
$synchronize_interval && // synchronization is on
|
||||
$cursor > 0 && // cursor is further than zero
|
||||
$loops % $synchronize_interval === 0 // time to synchronize!
|
||||
) {
|
||||
$current_line = 1 + $this->substrCount($html, $nl, 0, $cursor);
|
||||
// $cursor is either at the start of a token, or inside of
|
||||
// a tag (i.e. there was a < immediately before it), as indicated
|
||||
// by $inside_tag
|
||||
|
||||
if ($maintain_line_numbers) {
|
||||
|
||||
// $rcursor, however, is always at the start of a token.
|
||||
$rcursor = $cursor - (int) $inside_tag;
|
||||
|
||||
// Column number is cheap, so we calculate it every round.
|
||||
// We're interested at the *end* of the newline string, so
|
||||
// we need to add strlen($nl) == 1 to $nl_pos before subtracting it
|
||||
// from our "rcursor" position.
|
||||
$nl_pos = strrpos($html, $nl, $rcursor - $length);
|
||||
$current_col = $rcursor - (is_bool($nl_pos) ? 0 : $nl_pos + 1);
|
||||
|
||||
// recalculate lines
|
||||
if (
|
||||
$synchronize_interval && // synchronization is on
|
||||
$cursor > 0 && // cursor is further than zero
|
||||
$loops % $synchronize_interval === 0 // time to synchronize!
|
||||
) {
|
||||
$current_line = 1 + $this->substrCount($html, $nl, 0, $cursor);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
$position_next_lt = strpos($html, '<', $cursor);
|
||||
@@ -99,7 +124,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
||||
)
|
||||
);
|
||||
if ($maintain_line_numbers) {
|
||||
$token->line = $current_line;
|
||||
$token->rawPosition($current_line, $current_col);
|
||||
$current_line += $this->substrCount($html, $nl, $cursor, $position_next_lt - $cursor);
|
||||
}
|
||||
$array[] = $token;
|
||||
@@ -119,7 +144,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
||||
)
|
||||
)
|
||||
);
|
||||
if ($maintain_line_numbers) $token->line = $current_line;
|
||||
if ($maintain_line_numbers) $token->rawPosition($current_line, $current_col);
|
||||
$array[] = $token;
|
||||
break;
|
||||
} elseif ($inside_tag && $position_next_gt !== false) {
|
||||
@@ -167,7 +192,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
||||
)
|
||||
);
|
||||
if ($maintain_line_numbers) {
|
||||
$token->line = $current_line;
|
||||
$token->rawPosition($current_line, $current_col);
|
||||
$current_line += $this->substrCount($html, $nl, $cursor, $strlen_segment);
|
||||
}
|
||||
$array[] = $token;
|
||||
@@ -182,7 +207,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
||||
$type = substr($segment, 1);
|
||||
$token = new HTMLPurifier_Token_End($type);
|
||||
if ($maintain_line_numbers) {
|
||||
$token->line = $current_line;
|
||||
$token->rawPosition($current_line, $current_col);
|
||||
$current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
|
||||
}
|
||||
$array[] = $token;
|
||||
@@ -199,7 +224,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
||||
if ($e) $e->send(E_NOTICE, 'Lexer: Unescaped lt');
|
||||
$token = new HTMLPurifier_Token_Text('<');
|
||||
if ($maintain_line_numbers) {
|
||||
$token->line = $current_line;
|
||||
$token->rawPosition($current_line, $current_col);
|
||||
$current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
|
||||
}
|
||||
$array[] = $token;
|
||||
@@ -227,7 +252,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
||||
$token = new HTMLPurifier_Token_Start($segment);
|
||||
}
|
||||
if ($maintain_line_numbers) {
|
||||
$token->line = $current_line;
|
||||
$token->rawPosition($current_line, $current_col);
|
||||
$current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
|
||||
}
|
||||
$array[] = $token;
|
||||
@@ -259,7 +284,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
||||
$token = new HTMLPurifier_Token_Start($type, $attr);
|
||||
}
|
||||
if ($maintain_line_numbers) {
|
||||
$token->line = $current_line;
|
||||
$token->rawPosition($current_line, $current_col);
|
||||
$current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor);
|
||||
}
|
||||
$array[] = $token;
|
||||
@@ -276,7 +301,7 @@ class HTMLPurifier_Lexer_DirectLex extends HTMLPurifier_Lexer
|
||||
substr($html, $cursor)
|
||||
)
|
||||
);
|
||||
if ($maintain_line_numbers) $token->line = $current_line;
|
||||
if ($maintain_line_numbers) $token->rawPosition($current_line, $current_col);
|
||||
// no cursor scroll? Hmm...
|
||||
$array[] = $token;
|
||||
break;
|
||||
|
Reference in New Issue
Block a user