diff --git a/library/HTMLPurifier.includes.php b/library/HTMLPurifier.includes.php
index 2de7f190..260d82d8 100644
--- a/library/HTMLPurifier.includes.php
+++ b/library/HTMLPurifier.includes.php
@@ -201,6 +201,7 @@ require 'HTMLPurifier/URIFilter/HostBlacklist.php';
require 'HTMLPurifier/URIFilter/MakeAbsolute.php';
require 'HTMLPurifier/URIFilter/Munge.php';
require 'HTMLPurifier/URIScheme/data.php';
+require 'HTMLPurifier/URIScheme/file.php';
require 'HTMLPurifier/URIScheme/ftp.php';
require 'HTMLPurifier/URIScheme/http.php';
require 'HTMLPurifier/URIScheme/https.php';
diff --git a/library/HTMLPurifier.safe-includes.php b/library/HTMLPurifier.safe-includes.php
index 630daaa1..ec68b498 100644
--- a/library/HTMLPurifier.safe-includes.php
+++ b/library/HTMLPurifier.safe-includes.php
@@ -195,6 +195,7 @@ require_once $__dir . '/HTMLPurifier/URIFilter/HostBlacklist.php';
require_once $__dir . '/HTMLPurifier/URIFilter/MakeAbsolute.php';
require_once $__dir . '/HTMLPurifier/URIFilter/Munge.php';
require_once $__dir . '/HTMLPurifier/URIScheme/data.php';
+require_once $__dir . '/HTMLPurifier/URIScheme/file.php';
require_once $__dir . '/HTMLPurifier/URIScheme/ftp.php';
require_once $__dir . '/HTMLPurifier/URIScheme/http.php';
require_once $__dir . '/HTMLPurifier/URIScheme/https.php';
diff --git a/library/HTMLPurifier/ConfigSchema/schema.ser b/library/HTMLPurifier/ConfigSchema/schema.ser
index 2d7cfa1a..528bd0de 100644
Binary files a/library/HTMLPurifier/ConfigSchema/schema.ser and b/library/HTMLPurifier/ConfigSchema/schema.ser differ
diff --git a/library/HTMLPurifier/ConfigSchema/schema/HTML.NewlineNormalization.txt b/library/HTMLPurifier/ConfigSchema/schema/HTML.NewlineNormalization.txt
new file mode 100644
index 00000000..948aea89
--- /dev/null
+++ b/library/HTMLPurifier/ConfigSchema/schema/HTML.NewlineNormalization.txt
@@ -0,0 +1,9 @@
+HTML.NewlineNormalization
+TYPE: bool
+VERSION: 4.2.0
+DEFAULT: true
+--DESCRIPTION--
+
+ Whether or not to normalize newlines.
+
+--# vim: et sw=4 sts=4
diff --git a/library/HTMLPurifier/Lexer.php b/library/HTMLPurifier/Lexer.php
index f853421a..db14ebac 100644
--- a/library/HTMLPurifier/Lexer.php
+++ b/library/HTMLPurifier/Lexer.php
@@ -263,8 +263,10 @@ class HTMLPurifier_Lexer
public function normalize($html, $config, $context) {
// normalize newlines to \n
- $html = str_replace("\r\n", "\n", $html);
- $html = str_replace("\r", "\n", $html);
+ if ($config->get('HTML.NewlineNormalization')) {
+ $html = str_replace("\r\n", "\n", $html);
+ $html = str_replace("\r", "\n", $html);
+ }
if ($config->get('HTML.Trusted')) {
// escape convoluted CDATA
diff --git a/library/HTMLPurifier/Lexer/PH5P.php b/library/HTMLPurifier/Lexer/PH5P.php
index fa1bf973..faf00b82 100644
--- a/library/HTMLPurifier/Lexer/PH5P.php
+++ b/library/HTMLPurifier/Lexer/PH5P.php
@@ -125,8 +125,6 @@ class HTML5 {
const EOF = 5;
public function __construct($data) {
- $data = str_replace("\r\n", "\n", $data);
- $data = str_replace("\r", null, $data);
$this->data = $data;
$this->char = -1;
diff --git a/tests/HTMLPurifier/LexerTest.php b/tests/HTMLPurifier/LexerTest.php
index 0cb95155..79d1cf87 100644
--- a/tests/HTMLPurifier/LexerTest.php
+++ b/tests/HTMLPurifier/LexerTest.php
@@ -725,6 +725,24 @@ div {}
);
}
+ function test_tokenizeHTML_removeNewline() {
+ $this->config->set('HTML.NewlineNormalization', true);
+ $input = "plain text\r\n";
+ $expect = array(
+ new HTMLPurifier_Token_Text("plain text\n")
+ );
+ }
+
+ function test_tokenizeHTML_noRemoveNewline() {
+ $this->config->set('HTML.NewlineNormalization', false);
+ $input = "plain text\r\n";
+ $expect = array(
+ new HTMLPurifier_Token_Text("plain text\r\n")
+ );
+ $this->assertTokenization($input, $expect);
+ }
+
+
/*
function test_tokenizeHTML_() {