From 9191877740ea6d1586d2c66b03092df778a737a0 Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Sat, 23 Jun 2007 18:50:41 +0000 Subject: [PATCH] Factor out auto-paragraph to injector class. git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1210 48356398-32a2-884e-a903-53898d9a118a --- library/HTMLPurifier/Injector.php | 23 +++ .../HTMLPurifier/Injector/AutoParagraph.php | 132 +++++++++++++++++ .../HTMLPurifier/Strategy/MakeWellFormed.php | 138 +----------------- 3 files changed, 161 insertions(+), 132 deletions(-) create mode 100644 library/HTMLPurifier/Injector.php create mode 100644 library/HTMLPurifier/Injector/AutoParagraph.php diff --git a/library/HTMLPurifier/Injector.php b/library/HTMLPurifier/Injector.php new file mode 100644 index 00000000..f8af26dc --- /dev/null +++ b/library/HTMLPurifier/Injector.php @@ -0,0 +1,23 @@ + \ No newline at end of file diff --git a/library/HTMLPurifier/Injector/AutoParagraph.php b/library/HTMLPurifier/Injector/AutoParagraph.php new file mode 100644 index 00000000..4e5357ba --- /dev/null +++ b/library/HTMLPurifier/Injector/AutoParagraph.php @@ -0,0 +1,132 @@ +get('CurrentNesting'); + // paragraphing is on + if (empty($current_nesting)) { + // we're in root node, great time to start a paragraph + // since we're also dealing with a text node + $result =& $context->get('OutputTokens'); + $result[] = new HTMLPurifier_Token_Start('p'); + $current_nesting[] = new HTMLPurifier_Token_Start('p'); + $this->_splitText($token, $config, $context); + } else { + // we're not in root node, so let's see whether or not + // we're in a paragraph + + // losslessly access the parent element + $parent = array_pop($current_nesting); + $current_nesting[] = $parent; + + if ($parent->name === 'p') { + $this->_splitText($token, $config, $context); + } + } + } + + /** + * Sub-function for auto-paragraphing that takes a token and splits it + * up into paragraphs unconditionally. Requires that a paragraph was + * already started + */ + function _splitText(&$token, $config, &$context) { + $dnl = PHP_EOL . PHP_EOL; // double-newline + $definition = $config->getHTMLDefinition(); + $current_nesting =& $context->get('CurrentNesting'); + + $raw_paragraphs = explode($dnl, $token->data); + + $token = false; // token has been completely dismantled + + // remove empty paragraphs + $paragraphs = array(); + foreach ($raw_paragraphs as $par) { + if (trim($par) !== '') $paragraphs[] = $par; + } + + $result =& $context->get('OutputTokens'); + + if (empty($paragraphs) && count($raw_paragraphs) > 1) { + $result[] = new HTMLPurifier_Token_End('p'); + array_pop($current_nesting); + return; + } + + foreach ($paragraphs as $data) { + $result[] = new HTMLPurifier_Token_Text($data); + $result[] = new HTMLPurifier_Token_End('p'); + $result[] = new HTMLPurifier_Token_Start('p'); + } + array_pop($result); // remove trailing start token + + // check the outside to determine whether or not end + // paragraph tag is needed (it's already there) + $end_paragraph = $this->_needsEndTag( + $context->get('InputTokens'), + $context->get('InputIndex'), + $definition + ); + + if ($end_paragraph) { + // things are good as they stand, remove top-level parent + // that we deferred + array_pop($current_nesting); + } else { + // remove the ending tag, no nesting modifications necessary + array_pop($result); + } + + } + + /** + * Determines if up-coming code requires an end-paragraph tag, + * otherwise, keep the paragraph open (don't make another one) + * @protected + */ + function _needsEndTag($tokens, $k, $definition) { + $end_paragraph = false; + for ($j = $k + 1; isset($tokens[$j]); $j++) { + if ($tokens[$j]->type == 'start' || $tokens[$j]->type == 'empty') { + if ($tokens[$j]->name == 'p') { + $end_paragraph = true; + } else { + $end_paragraph = isset($definition->info['p']->auto_close[$tokens[$j]->name]); + } + break; + } elseif ($tokens[$j]->type == 'text') { + if (!$tokens[$j]->is_whitespace) { + $end_paragraph = false; + break; + } + } elseif ($tokens[$j]->type == 'end') { + // nonsensical case + $end_paragraph = false; + break; + } + } + return $end_paragraph; + } + + function handleStart(&$token, $config, &$context) { + $current_nesting =& $context->get('CurrentNesting'); + if (!empty($current_nesting)) return; + $definition = $config->getHTMLDefinition(); + // to be replaced with new auto-auto_close algorithm + if (isset($definition->info['p']->auto_close[$token->name])) return; + $result =& $context->get('OutputTokens'); + $result[] = $current_nesting[] = new HTMLPurifier_Token_Start('p'); + } + +} + +?> \ No newline at end of file diff --git a/library/HTMLPurifier/Strategy/MakeWellFormed.php b/library/HTMLPurifier/Strategy/MakeWellFormed.php index 9e775f94..677d09af 100644 --- a/library/HTMLPurifier/Strategy/MakeWellFormed.php +++ b/library/HTMLPurifier/Strategy/MakeWellFormed.php @@ -4,6 +4,8 @@ require_once 'HTMLPurifier/Strategy.php'; require_once 'HTMLPurifier/HTMLDefinition.php'; require_once 'HTMLPurifier/Generator.php'; +require_once 'HTMLPurifier/Injector/AutoParagraph.php'; + HTMLPurifier_ConfigSchema::define( 'Core', 'AutoParagraph', false, 'bool', '

@@ -42,6 +44,8 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy $auto_paragraph_disabled = false; $context->register('AutoParagraphSkip', $auto_paragraph_skip); + $injector = new HTMLPurifier_Injector_AutoParagraph(); + for ($tokens_index = 0; isset($tokens[$tokens_index]); $tokens_index++) { // if all goes well, this token will be passed through unharmed @@ -60,7 +64,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy if ($token->type === 'text') { if ($auto_paragraph && !$auto_paragraph_disabled) { - $this->autoParagraphText($token, $config, $context); + $injector->handleText($token, $config, $context); } } @@ -113,7 +117,7 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy } if ($auto_paragraph && !$auto_paragraph_disabled) { - $this->autoParagraphStart($token, $config, $context); + $injector->handleStart($token, $config, $context); } $this->processToken($token, $config, $context); @@ -227,136 +231,6 @@ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy } } - /** - * Sub-function call for auto-paragraphing for any old text node. - * This will eventually - * be factored out into a generic Formatter class - * @note This function does not care at all about ending paragraph - * tags: the rest of MakeWellFormed handles that! - */ - function autoParagraphText(&$token, $config, &$context) { - $dnl = PHP_EOL . PHP_EOL; // double-newline - $current_nesting =& $context->get('CurrentNesting'); - // paragraphing is on - if (empty($current_nesting)) { - // we're in root node, great time to start a paragraph - // since we're also dealing with a text node - $result =& $context->get('OutputTokens'); - $result[] = new HTMLPurifier_Token_Start('p'); - $current_nesting[] = new HTMLPurifier_Token_Start('p'); - $this->autoParagraphSplitText($token, $config, $context); - } else { - // we're not in root node, so let's see whether or not - // we're in a paragraph - - // losslessly access the parent element - $parent = array_pop($current_nesting); - $current_nesting[] = $parent; - - if ($parent->name === 'p') { - $this->autoParagraphSplitText($token, $config, $context); - } - } - } - - /** - * Sub-function for auto-paragraphing that takes a token and splits it - * up into paragraphs unconditionally. Requires that a paragraph was - * already started - */ - function autoParagraphSplitText(&$token, $config, &$context) { - $dnl = PHP_EOL . PHP_EOL; // double-newline - $definition = $config->getHTMLDefinition(); - $current_nesting =& $context->get('CurrentNesting'); - - $raw_paragraphs = explode($dnl, $token->data); - - $token = false; // token has been completely dismantled - - // remove empty paragraphs - $paragraphs = array(); - foreach ($raw_paragraphs as $par) { - if (trim($par) !== '') $paragraphs[] = $par; - } - - $result =& $context->get('OutputTokens'); - - if (empty($paragraphs) && count($raw_paragraphs) > 1) { - $this->processToken(new HTMLPurifier_Token_End('p'), $config, $context); - return; - } - - // this could be rewritten to use processToken, but it would - // be slightly less efficient - - foreach ($paragraphs as $data) { - $result[] = new HTMLPurifier_Token_Text($data); - $result[] = new HTMLPurifier_Token_End('p'); - $result[] = new HTMLPurifier_Token_Start('p'); - } - array_pop($result); // remove trailing start token - - // check the outside to determine whether or not end - // paragraph tag is needed (it's already there) - $end_paragraph = $this->autoParagraphEndParagraph( - $context->get('InputTokens'), - $context->get('InputIndex'), - $definition - ); - - if ($end_paragraph) { - // things are good as they stand, remove top-level parent - // that we deferred - array_pop($current_nesting); - } else { - // remove the ending tag, no nesting modifications necessary - array_pop($result); - } - - } - - /** - * Determines if up-coming code requires an end-paragraph tag, - * otherwise, keep the paragraph open (don't make another one) - * @protected - */ - function autoParagraphEndParagraph($tokens, $k, $definition) { - $end_paragraph = false; - for ($j = $k + 1; isset($tokens[$j]); $j++) { - if ($tokens[$j]->type == 'start' || $tokens[$j]->type == 'empty') { - if ($tokens[$j]->name == 'p') { - $end_paragraph = true; - } else { - $end_paragraph = isset($definition->info['p']->auto_close[$tokens[$j]->name]); - } - break; - } elseif ($tokens[$j]->type == 'text') { - if (!$tokens[$j]->is_whitespace) { - $end_paragraph = false; - break; - } - } elseif ($tokens[$j]->type == 'end') { - // nonsensical case - $end_paragraph = false; - break; - } - } - return $end_paragraph; - } - - /** - * Sub-function for auto-paragraphing that processes element starts - */ - function autoParagraphStart(&$token, $config, &$context) { - $current_nesting =& $context->get('CurrentNesting'); - if (!empty($current_nesting)) return; - $definition = $config->getHTMLDefinition(); - // to be replaced with new auto-auto_close algorithm - if (isset($definition->info['p']->auto_close[$token->name])) return; - $result =& $context->get('OutputTokens'); - $this->processToken(new HTMLPurifier_Token_Start('p'), $config, $context); - } - } ?> \ No newline at end of file