From 967f40fc11e76ef7282f9914221fcf88a7be634c Mon Sep 17 00:00:00 2001
From: "Edward Z. Yang" <edwardzyang@thewritingpot.com>
Date: Sat, 9 Sep 2006 21:10:04 +0000
Subject: [PATCH] Make install docs more comprehensive about encoding. Prompted
 by http://hp.jpsband.org/vanilla/comments.php?DiscussionID=2

git-svn-id: http://htmlpurifier.org/svnroot/htmlpurifier/trunk@392 48356398-32a2-884e-a903-53898d9a118a
---
 INSTALL | 45 +++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 43 insertions(+), 2 deletions(-)

diff --git a/INSTALL b/INSTALL
index f225446b..ad74f405 100644
--- a/INSTALL
+++ b/INSTALL
@@ -35,7 +35,8 @@ but operating system is quite irrelevant in this particular case.
 The library/ directory must be added to your path: HTML Purifier will not be
 able to find the necessary includes otherwise.  This is as simple as:
 
-    set_include_path('/path/to/htmlpurifier/library' . PATH_SEPARATOR . get_include_path());
+    set_include_path('/path/to/htmlpurifier/library' . PATH_SEPARATOR .
+        get_include_path() );
 
 ...replacing /path/to/htmlpurifier with the actual location of the folder. Don't
 worry, HTML Purifier is namespaced so unless you have another file named
@@ -58,6 +59,13 @@ is a (short) checklist:
  * Have I specified XHTML 1.0 Transitional as the doctype?
  * Have I specified UTF-8 as the character encoding?
 
+To find out what these are, browse to your website and view its source code.
+You can figure out the doctype from the a declaration that looks like
+    <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+        "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+or no doctype.  You can figure out the character encoding by looking for
+    <meta http-equiv="Content-type" content="text/html;charset=ENCODING">
+
 I cannot stress the importance of these two bullets enough.  Omitting either
 of them could have dire consequences not only for security but for plain
 old usability.  You can find a more in-depth discussion of why this is needed
@@ -70,7 +78,12 @@ dependent on iconv, and you'll be missing characters if the charset you
 choose doesn't have them.
 
     $config = HTMLPurifier_Config::createDefault();
-    $config->set('Core', 'Encoding', $encoding);
+    $config->set('Core', 'Encoding', /* put your encoding here */);
+
+An example usage for Latin-1 websites:
+
+    $config = HTMLPurifier_Config::createDefault();
+    $config->set('Core', 'Encoding', 'ISO-8859-1');
 
 
 
@@ -88,3 +101,31 @@ Or, if you're using the configuration object:
 
 That's it.  For more examples, check out docs/examples/.  Also, SLOW gives
 advice on what to do if HTML Purifier is slowing down your application.
+
+
+
+4.   Quick install
+
+If your website is in UTF-8, use this code:
+
+<?php
+    set_include_path('/path/to/htmlpurifier/library'
+         . PATH_SEPARATOR . get_include_path() );
+    require_once 'HTMLPurifier.php';
+    $purifier = new HTMLPurifier();
+    
+    $clean_html = $purifier->purify($dirty_html);
+
+If your website is in a different encoding, use this code:
+
+<?php
+    set_include_path('/path/to/htmlpurifier/library'
+         . PATH_SEPARATOR . get_include_path() );
+    require_once 'HTMLPurifier.php';
+    
+    $config = HTMLPurifier_Config::createDefault();
+    $config->set('Core', 'Encoding', 'ISO-8859-1'); //replace with your encoding
+    $purifier = new HTMLPurifier($config);
+    
+    $clean_html = $purifier->purify($dirty_html);
+?>
\ No newline at end of file