mirror of
				https://github.com/ezyang/htmlpurifier.git
				synced 2025-10-25 02:26:32 +02:00 
			
		
		
		
	git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/branches/php4@1713 48356398-32a2-884e-a903-53898d9a118a
		
			
				
	
	
		
			317 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
			
		
		
	
	
			317 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
| <?php
 | |
| 
 | |
| require_once 'HTMLPurifier/Strategy.php';
 | |
| require_once 'HTMLPurifier/HTMLDefinition.php';
 | |
| require_once 'HTMLPurifier/Generator.php';
 | |
| 
 | |
| require_once 'HTMLPurifier/Injector/AutoParagraph.php';
 | |
| require_once 'HTMLPurifier/Injector/Linkify.php';
 | |
| require_once 'HTMLPurifier/Injector/PurifierLinkify.php';
 | |
| 
 | |
| HTMLPurifier_ConfigSchema::define(
 | |
|     'AutoFormat', 'Custom', array(), 'list', '
 | |
| <p>
 | |
|   This directive can be used to add custom auto-format injectors.
 | |
|   Specify an array of injector names (class name minus the prefix)
 | |
|   or concrete implementations. Injector class must exist. This directive
 | |
|   has been available since 2.0.1.
 | |
| </p>
 | |
| '
 | |
| );
 | |
| 
 | |
| /**
 | |
|  * Takes tokens makes them well-formed (balance end tags, etc.)
 | |
|  */
 | |
| class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy
 | |
| {
 | |
|     
 | |
|     /**
 | |
|      * Locally shared variable references
 | |
|      * @private
 | |
|      */
 | |
|     var $inputTokens, $inputIndex, $outputTokens, $currentNesting,
 | |
|         $currentInjector, $injectors;
 | |
|     
 | |
|     function execute($tokens, $config, &$context) {
 | |
|         
 | |
|         $definition = $config->getHTMLDefinition();
 | |
|         
 | |
|         // local variables
 | |
|         $result = array();
 | |
|         $generator = new HTMLPurifier_Generator();
 | |
|         $escape_invalid_tags = $config->get('Core', 'EscapeInvalidTags');
 | |
|         $e =& $context->get('ErrorCollector', true);
 | |
|         
 | |
|         // member variables
 | |
|         $this->currentNesting = array();
 | |
|         $this->inputIndex     = false;
 | |
|         $this->inputTokens    =& $tokens;
 | |
|         $this->outputTokens   =& $result;
 | |
|         
 | |
|         // context variables
 | |
|         $context->register('CurrentNesting', $this->currentNesting);
 | |
|         $context->register('InputIndex', $this->inputIndex);
 | |
|         $context->register('InputTokens', $tokens);
 | |
|         
 | |
|         // -- begin INJECTOR --
 | |
|         
 | |
|         $this->injectors = array();
 | |
|         
 | |
|         $injectors = $config->getBatch('AutoFormat');
 | |
|         $custom_injectors = $injectors['Custom'];
 | |
|         unset($injectors['Custom']); // special case
 | |
|         foreach ($injectors as $injector => $b) {
 | |
|             $injector = "HTMLPurifier_Injector_$injector";
 | |
|             if (!$b) continue;
 | |
|             $this->injectors[] = new $injector;
 | |
|         }
 | |
|         foreach ($custom_injectors as $injector) {
 | |
|             if (is_string($injector)) {
 | |
|                 $injector = "HTMLPurifier_Injector_$injector";
 | |
|                 $injector = new $injector;
 | |
|             }
 | |
|             $this->injectors[] = $injector;
 | |
|         }
 | |
|         
 | |
|         // array index of the injector that resulted in an array
 | |
|         // substitution. This enables processTokens() to know which
 | |
|         // injectors are affected by the added tokens and which are
 | |
|         // not (namely, the ones after the current injector are not
 | |
|         // affected)
 | |
|         $this->currentInjector = false;
 | |
|         
 | |
|         // give the injectors references to the definition and context
 | |
|         // variables for performance reasons
 | |
|         foreach ($this->injectors as $i => $x) {
 | |
|             $error = $this->injectors[$i]->prepare($config, $context);
 | |
|             if (!$error) continue;
 | |
|             list($injector) = array_splice($this->injectors, $i, 1);
 | |
|             $name = $injector->name;
 | |
|             trigger_error("Cannot enable $name injector because $error is not allowed", E_USER_WARNING);
 | |
|         }
 | |
|         
 | |
|         // warning: most foreach loops follow the convention $i => $x.
 | |
|         // be sure, for PHP4 compatibility, to only perform write operations
 | |
|         // directly referencing the object using $i: $x is only safe for reads
 | |
|         
 | |
|         // -- end INJECTOR --
 | |
|         
 | |
|         $token = false;
 | |
|         $context->register('CurrentToken', $token);
 | |
|         
 | |
|         for ($this->inputIndex = 0; isset($tokens[$this->inputIndex]); $this->inputIndex++) {
 | |
|             
 | |
|             // if all goes well, this token will be passed through unharmed
 | |
|             $token = $tokens[$this->inputIndex];
 | |
|             
 | |
|             //printTokens($tokens, $this->inputIndex);
 | |
|             
 | |
|             foreach ($this->injectors as $i => $x) {
 | |
|                 if ($x->skip > 0) $this->injectors[$i]->skip--;
 | |
|             }
 | |
|             
 | |
|             // quick-check: if it's not a tag, no need to process
 | |
|             if (empty( $token->is_tag )) {
 | |
|                 if ($token->type === 'text') {
 | |
|                      // injector handler code; duplicated for performance reasons
 | |
|                      foreach ($this->injectors as $i => $x) {
 | |
|                          if (!$x->skip) $this->injectors[$i]->handleText($token);
 | |
|                          if (is_array($token)) {
 | |
|                              $this->currentInjector = $i;
 | |
|                              break;
 | |
|                          }
 | |
|                      }
 | |
|                 }
 | |
|                 $this->processToken($token, $config, $context);
 | |
|                 continue;
 | |
|             }
 | |
|             
 | |
|             $info = $definition->info[$token->name]->child;
 | |
|             
 | |
|             // quick tag checks: anything that's *not* an end tag
 | |
|             $ok = false;
 | |
|             if ($info->type == 'empty' && $token->type == 'start') {
 | |
|                 // test if it claims to be a start tag but is empty
 | |
|                 $token = new HTMLPurifier_Token_Empty($token->name, $token->attr);
 | |
|                 $ok = true;
 | |
|             } elseif ($info->type != 'empty' && $token->type == 'empty' ) {
 | |
|                 // claims to be empty but really is a start tag
 | |
|                 $token = array(
 | |
|                     new HTMLPurifier_Token_Start($token->name, $token->attr),
 | |
|                     new HTMLPurifier_Token_End($token->name)
 | |
|                 );
 | |
|                 $ok = true;
 | |
|             } elseif ($token->type == 'empty') {
 | |
|                 // real empty token
 | |
|                 $ok = true;
 | |
|             } elseif ($token->type == 'start') {
 | |
|                 // start tag
 | |
|                 
 | |
|                 // ...unless they also have to close their parent
 | |
|                 if (!empty($this->currentNesting)) {
 | |
|                     
 | |
|                     $parent = array_pop($this->currentNesting);
 | |
|                     $parent_info = $definition->info[$parent->name];
 | |
|                     
 | |
|                     // this can be replaced with a more general algorithm:
 | |
|                     // if the token is not allowed by the parent, auto-close
 | |
|                     // the parent
 | |
|                     if (!isset($parent_info->child->elements[$token->name])) {
 | |
|                         if ($e) $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent);
 | |
|                         // close the parent, then re-loop to reprocess token
 | |
|                         $result[] = new HTMLPurifier_Token_End($parent->name);
 | |
|                         $this->inputIndex--;
 | |
|                         continue;
 | |
|                     }
 | |
|                     
 | |
|                     $this->currentNesting[] = $parent; // undo the pop
 | |
|                 }
 | |
|                 $ok = true;
 | |
|             }
 | |
|             
 | |
|             // injector handler code; duplicated for performance reasons
 | |
|             if ($ok) {
 | |
|                 foreach ($this->injectors as $i => $x) {
 | |
|                     if (!$x->skip) $this->injectors[$i]->handleElement($token);
 | |
|                     if (is_array($token)) {
 | |
|                         $this->currentInjector = $i;
 | |
|                         break;
 | |
|                     }
 | |
|                 }
 | |
|                 $this->processToken($token, $config, $context);
 | |
|                 continue;
 | |
|             }
 | |
|             
 | |
|             // sanity check: we should be dealing with a closing tag
 | |
|             if ($token->type != 'end') continue;
 | |
|             
 | |
|             // make sure that we have something open
 | |
|             if (empty($this->currentNesting)) {
 | |
|                 if ($escape_invalid_tags) {
 | |
|                     if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag to text');
 | |
|                     $result[] = new HTMLPurifier_Token_Text(
 | |
|                         $generator->generateFromToken($token, $config, $context)
 | |
|                     );
 | |
|                 } elseif ($e) {
 | |
|                     $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag removed');
 | |
|                 }
 | |
|                 continue;
 | |
|             }
 | |
|             
 | |
|             // first, check for the simplest case: everything closes neatly
 | |
|             $current_parent = array_pop($this->currentNesting);
 | |
|             if ($current_parent->name == $token->name) {
 | |
|                 $result[] = $token;
 | |
|                 foreach ($this->injectors as $i => $x) {
 | |
|                     $this->injectors[$i]->notifyEnd($token);
 | |
|                 }
 | |
|                 continue;
 | |
|             }
 | |
|             
 | |
|             // okay, so we're trying to close the wrong tag
 | |
|             
 | |
|             // undo the pop previous pop
 | |
|             $this->currentNesting[] = $current_parent;
 | |
|             
 | |
|             // scroll back the entire nest, trying to find our tag.
 | |
|             // (feature could be to specify how far you'd like to go)
 | |
|             $size = count($this->currentNesting);
 | |
|             // -2 because -1 is the last element, but we already checked that
 | |
|             $skipped_tags = false;
 | |
|             for ($i = $size - 2; $i >= 0; $i--) {
 | |
|                 if ($this->currentNesting[$i]->name == $token->name) {
 | |
|                     // current nesting is modified
 | |
|                     $skipped_tags = array_splice($this->currentNesting, $i);
 | |
|                     break;
 | |
|                 }
 | |
|             }
 | |
|             
 | |
|             // we still didn't find the tag, so remove
 | |
|             if ($skipped_tags === false) {
 | |
|                 if ($escape_invalid_tags) {
 | |
|                     $result[] = new HTMLPurifier_Token_Text(
 | |
|                         $generator->generateFromToken($token, $config, $context)
 | |
|                     );
 | |
|                     if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag to text');
 | |
|                 } elseif ($e) {
 | |
|                     $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag removed');
 | |
|                 }
 | |
|                 continue;
 | |
|             }
 | |
|             
 | |
|             // okay, we found it, close all the skipped tags
 | |
|             // note that skipped tags contains the element we need closed
 | |
|             for ($i = count($skipped_tags) - 1; $i >= 0; $i--) {
 | |
|                 if ($i && $e && !isset($skipped_tags[$i]->armor['MakeWellFormed_TagClosedError'])) {
 | |
|                     $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$i]);
 | |
|                 }
 | |
|                 $result[] = $new_token = new HTMLPurifier_Token_End($skipped_tags[$i]->name);
 | |
|                 foreach ($this->injectors as $j => $x) { // $j, not $i!!!
 | |
|                     $this->injectors[$j]->notifyEnd($new_token);
 | |
|                 }
 | |
|             }
 | |
|             
 | |
|         }
 | |
|         
 | |
|         $context->destroy('CurrentNesting');
 | |
|         $context->destroy('InputTokens');
 | |
|         $context->destroy('InputIndex');
 | |
|         $context->destroy('CurrentToken');
 | |
|         
 | |
|         // we're at the end now, fix all still unclosed tags (this is
 | |
|         // duplicated from the end of the loop with some slight modifications)
 | |
|         // not using $skipped_tags since it would invariably be all of them
 | |
|         if (!empty($this->currentNesting)) {
 | |
|             for ($i = count($this->currentNesting) - 1; $i >= 0; $i--) {
 | |
|                 if ($e && !isset($this->currentNesting[$i]->armor['MakeWellFormed_TagClosedError'])) {
 | |
|                     $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $this->currentNesting[$i]);
 | |
|                 }
 | |
|                 $result[] = $new_token = new HTMLPurifier_Token_End($this->currentNesting[$i]->name);
 | |
|                 foreach ($this->injectors as $j => $x) { // $j, not $i!!!
 | |
|                     $this->injectors[$j]->notifyEnd($new_token);
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
|         
 | |
|         unset($this->outputTokens, $this->injectors, $this->currentInjector,
 | |
|           $this->currentNesting, $this->inputTokens, $this->inputIndex);
 | |
|         
 | |
|         return $result;
 | |
|     }
 | |
|     
 | |
|     function processToken($token, $config, &$context) {
 | |
|         if (is_array($token)) {
 | |
|             // the original token was overloaded by an injector, time
 | |
|             // to some fancy acrobatics
 | |
|             
 | |
|             // $this->inputIndex is decremented so that the entire set gets
 | |
|             // re-processed
 | |
|             array_splice($this->inputTokens, $this->inputIndex--, 1, $token);
 | |
|             
 | |
|             // adjust the injector skips based on the array substitution
 | |
|             if ($this->injectors) {
 | |
|                 $offset = count($token);
 | |
|                 for ($i = 0; $i <= $this->currentInjector; $i++) {
 | |
|                     // because of the skip back, we need to add one more
 | |
|                     // for uninitialized injectors. I'm not exactly
 | |
|                     // sure why this is the case, but I think it has to
 | |
|                     // do with the fact that we're decrementing skips
 | |
|                     // before re-checking text
 | |
|                     if (!$this->injectors[$i]->skip) $this->injectors[$i]->skip++;
 | |
|                     $this->injectors[$i]->skip += $offset;
 | |
|                 }
 | |
|             }
 | |
|         } elseif ($token) {
 | |
|             // regular case
 | |
|             $this->outputTokens[] = $token;
 | |
|             if ($token->type == 'start') {
 | |
|                 $this->currentNesting[] = $token;
 | |
|             } elseif ($token->type == 'end') {
 | |
|                 array_pop($this->currentNesting); // not actually used
 | |
|             }
 | |
|         }
 | |
|     }
 | |
|     
 | |
| }
 | |
| 
 |