mirror of
				https://github.com/ezyang/htmlpurifier.git
				synced 2025-10-24 18:16:19 +02:00 
			
		
		
		
	git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@1343 48356398-32a2-884e-a903-53898d9a118a
		
			
				
	
	
		
			72 lines
		
	
	
		
			2.0 KiB
		
	
	
	
		
			PHP
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			72 lines
		
	
	
		
			2.0 KiB
		
	
	
	
		
			PHP
		
	
	
		
			Executable File
		
	
	
	
	
| #!/usr/bin/php
 | |
| <?php
 | |
| 
 | |
| require_once 'common.php';
 | |
| assertCli();
 | |
| 
 | |
| /**
 | |
|  * Parses *.ent files into an entity lookup table, and then serializes and
 | |
|  * writes the whole kaboodle to a file. The resulting file should be versioned.
 | |
|  */
 | |
| 
 | |
| chdir( dirname(__FILE__) );
 | |
| 
 | |
| // here's where the entity files are located, assuming working directory
 | |
| // is the same as the location of this PHP file. Needs trailing slash.
 | |
| $entity_dir = '../docs/entities/';
 | |
| 
 | |
| // defines the output file for the serialized content.
 | |
| $output_file = '../library/HTMLPurifier/EntityLookup/entities.ser';
 | |
| 
 | |
| // courtesy of a PHP manual comment
 | |
| function unichr($dec) {
 | |
|     if ($dec < 128) {
 | |
|         $utf  = chr($dec);
 | |
|     } else if ($dec < 2048) {
 | |
|         $utf  = chr(192 + (($dec - ($dec % 64)) / 64));
 | |
|         $utf .= chr(128 + ($dec % 64));
 | |
|     } else {
 | |
|         $utf  = chr(224 + (($dec - ($dec % 4096)) / 4096));
 | |
|         $utf .= chr(128 + ((($dec % 4096) - ($dec % 64)) / 64));
 | |
|         $utf .= chr(128 + ($dec % 64));
 | |
|     }
 | |
|     return $utf;
 | |
| }
 | |
| 
 | |
| if ( !is_dir($entity_dir) ) exit("Fatal Error: Can't find entity directory.\n");
 | |
| if ( file_exists($output_file) ) exit("Fatal Error: entity-lookup.txt already exists.\n");
 | |
| 
 | |
| $dh = @opendir($entity_dir);
 | |
| if ( !$dh ) exit("Fatal Error: Cannot read entity directory.\n");
 | |
| 
 | |
| $entity_files = array();
 | |
| while (($file = readdir($dh)) !== false) {
 | |
|     if (@$file[0] === '.') continue;
 | |
|     if (substr(strrchr($file, "."), 1) !== 'ent') continue;
 | |
|     $entity_files[] = $file;
 | |
| }
 | |
| closedir($dh);
 | |
| 
 | |
| if ( !$entity_files ) exit("Fatal Error: No entity files to parse.\n");
 | |
| 
 | |
| $entity_table = array();
 | |
| $regexp = '/<!ENTITY\s+([A-Za-z]+)\s+"&#(?:38;#)?([0-9]+);">/';
 | |
| 
 | |
| foreach ( $entity_files as $file ) {
 | |
|     $contents = file_get_contents($entity_dir . $file);
 | |
|     $matches = array();
 | |
|     preg_match_all($regexp, $contents, $matches, PREG_SET_ORDER);
 | |
|     foreach ($matches as $match) {
 | |
|         $entity_table[$match[1]] = unichr($match[2]);
 | |
|     }
 | |
| }
 | |
| 
 | |
| $output = serialize($entity_table);
 | |
| 
 | |
| $fh = fopen($output_file, 'w');
 | |
| fwrite($fh, $output);
 | |
| fclose($fh);
 | |
| 
 | |
| echo "Completed successfully.";
 | |
| 
 |