diff --git a/wire/core/Sanitizer.php b/wire/core/Sanitizer.php
index 1d586f0c..e7ffeb5d 100644
--- a/wire/core/Sanitizer.php
+++ b/wire/core/Sanitizer.php
@@ -1093,18 +1093,23 @@ class Sanitizer extends Wire {
}
/**
- * Returns a value that may be used in an email header
+ * Returns a value that may be used in an email header
+ *
+ * This method is designed to prevent one email header from injecting into another.
*
* #pw-group-strings
*
* @param string $value
+ * @param bool $headerName Sanitize a header name rather than header value? (default=false) Since 3.0.132
* @return string
*
*/
- public function emailHeader($value) {
+ public function emailHeader($value, $headerName = false) {
if(!is_string($value)) return '';
- $a = array("\n", "\r", "", "", "0x0A", "0x0D", "%0A", "%0D", 'content-type:', 'bcc:', 'cc:', 'to:', 'reply-to:');
- return trim(str_ireplace($a, ' ', $value));
+ $a = array("\n", "\r", "", "", "0x0A", "0x0D", "%0A", "%0D"); // newlines
+ $value = trim(str_ireplace($a, ' ', stripslashes($value)));
+ if($headerName) $value = trim(preg_replace('/[^-_a-zA-Z0-9]/', '-', trim($value, ':')), '-');
+ return $value;
}
/**
@@ -1354,6 +1359,16 @@ class Sanitizer extends Wire {
/**
* Convert a string containing markup or entities to be plain text
*
+ * This is one implementation but there is also a better one that you may prefer with the
+ * `WireTextTools::markupToText()` method:
+ *
+ * ~~~~~
+ * $markup = 'a bunch of HTML here';
+ * // try both to see what you prefer:
+ * $text1 = $sanitizer->markupToText($html);
+ * $text2 = $sanitizer->getTextTools()->markupToText();
+ * ~~~~~
+ *
* #pw-group-strings
*
* @param string $value String you want to convert
@@ -1363,6 +1378,7 @@ class Sanitizer extends Wire {
* - `entities` (bool): Entity encode returned value? (default=false).
* - `trim` (string): Character(s) to trim from beginning and end of value (default=" -,:;|\n\t").
* @return string Converted string of text
+ * @see WireTextTools::markupToText() for different though likely better (for most cases) implementation.
*
*/
public function markupToText($value, array $options = array()) {
diff --git a/wire/core/WireMail.php b/wire/core/WireMail.php
index 210d45e9..8bdd526f 100644
--- a/wire/core/WireMail.php
+++ b/wire/core/WireMail.php
@@ -52,6 +52,8 @@
*
* @method int send() Send email.
* @method string htmlToText($html) Convert HTML email body to TEXT email body.
+ * @method string sanitizeHeaderName($name) #pw-internal
+ * @method string sanitizeHeaderValue($value) #pw-internal
*
* @property array $to To email address.
* @property array $toName Optional person’s name to accompany “to” email address
@@ -67,6 +69,7 @@
* @property array $param Associative array of aditional params (likely not applicable to most WireMail modules).
* @property array $attachments Array of file attachments (if populated and where supported) #pw-advanced
* @property string $newline Newline character, populated only if different from CRLF. #pw-advanced
+ *
*
*/
@@ -160,14 +163,46 @@ class WireMail extends WireData implements WireMailInterface {
}
/**
- * Sanitize string for use in a email header
+ * Sanitize and normalize a header name
+ *
+ * @param string $name
+ * @return string
+ * @since 3.0.132
+ *
+ */
+ protected function ___sanitizeHeaderName($name) {
+ /** @var Sanitizer $sanitizer */
+ $sanitizer = $this->wire('sanitizer');
+ $name = $sanitizer->emailHeader($name, true);
+ // ensure consistent capitalization for header names
+ $name = ucwords(str_replace('-', ' ', $name));
+ $name = str_replace(' ', '-', $name);
+ return $name;
+ }
+
+ /**
+ * Sanitize an email header header value
+ *
+ * @param string $value
+ * @return string
+ * @since 3.0.132
+ *
+ */
+ protected function ___sanitizeHeaderValue($value) {
+ return $this->wire('sanitizer')->emailHeader($value);
+ }
+
+ /**
+ * Alias of sanitizeHeaderValue() method for backwards compatibility
*
+ * #pw-internal
+ *
* @param string $header
* @return string
- *
+ *
*/
protected function sanitizeHeader($header) {
- return $this->wire('sanitizer')->emailHeader($header);
+ return $this->sanitizeHeaderValue($header);
}
/**
@@ -182,7 +217,7 @@ class WireMail extends WireData implements WireMailInterface {
if(strpos($email, '<') !== false && strpos($email, '>') !== false) {
// email has separate from name and email
if(preg_match('/^(.*?)<([^>]+)>.*$/', $email, $matches)) {
- $name = $this->sanitizeHeader($matches[1]);
+ $name = $this->sanitizeHeaderValue($matches[1]);
$email = $matches[2];
}
}
@@ -203,7 +238,7 @@ class WireMail extends WireData implements WireMailInterface {
protected function bundleEmailAndName($email, $name) {
$email = $this->sanitizeEmail($email);
if(!strlen($name)) return $email;
- $name = $this->sanitizeHeader($name);
+ $name = $this->sanitizeHeaderValue($name);
$delim = '';
if(strpos($name, ',') !== false) {
// name contains a comma, so quote the value
@@ -265,7 +300,7 @@ class WireMail extends WireData implements WireMailInterface {
$toEmail = $this->sanitizeEmail($toEmail);
if(strlen($toEmail)) {
$this->mail['to'][$toEmail] = $toEmail;
- $this->mail['toName'][$toEmail] = $this->sanitizeHeader($toName);
+ $this->mail['toName'][$toEmail] = $this->sanitizeHeaderValue($toName);
}
}
@@ -289,7 +324,7 @@ class WireMail extends WireData implements WireMailInterface {
$emails = $this->mail['to'];
if(!count($emails)) throw new WireException("Please set a 'to' address before setting a name.");
$email = end($emails);
- $this->mail['toName'][$email] = $this->sanitizeHeader($name);
+ $this->mail['toName'][$email] = $this->sanitizeHeaderValue($name);
return $this;
}
@@ -324,7 +359,7 @@ class WireMail extends WireData implements WireMailInterface {
*
*/
public function fromName($name) {
- $this->mail['fromName'] = $this->sanitizeHeader($name);
+ $this->mail['fromName'] = $this->sanitizeHeaderValue($name);
return $this;
}
@@ -343,7 +378,7 @@ class WireMail extends WireData implements WireMailInterface {
} else {
$email = $this->sanitizeEmail($email);
}
- if($name) $this->mail['replyToName'] = $this->sanitizeHeader($name);
+ if($name) $this->mail['replyToName'] = $this->sanitizeHeaderValue($name);
$this->mail['replyTo'] = $email;
if(empty($name) && !empty($this->mail['replyToName'])) $name = $this->mail['replyToName'];
if(strlen($name)) $email = $this->bundleEmailAndName($email, $name);
@@ -360,7 +395,7 @@ class WireMail extends WireData implements WireMailInterface {
*/
public function replyToName($name) {
if(strlen($this->mail['replyTo'])) return $this->replyTo($this->mail['replyTo'], $name);
- $this->mail['replyToName'] = $this->sanitizeHeader($name);
+ $this->mail['replyToName'] = $this->sanitizeHeaderValue($name);
return $this;
}
@@ -372,7 +407,7 @@ class WireMail extends WireData implements WireMailInterface {
*
*/
public function subject($subject) {
- $this->mail['subject'] = $this->sanitizeHeader($subject);
+ $this->mail['subject'] = $this->sanitizeHeaderValue($subject);
return $this;
}
@@ -430,15 +465,13 @@ class WireMail extends WireData implements WireMailInterface {
if(is_array($key)) {
$this->headers($key);
} else {
+ $key = $this->sanitizeHeaderName($key);
unset($this->mail['header'][$key]);
}
- } else {
- $k = $this->wire('sanitizer')->name($this->sanitizeHeader($key));
- // ensure consistent capitalization for all header keys
- $k = ucwords(str_replace('-', ' ', $k));
- $k = str_replace(' ', '-', $k);
- $v = $this->sanitizeHeader($value);
- $this->mail['header'][$k] = $v;
+ } else {
+ $key = $this->sanitizeHeaderName($key);
+ $value = $this->sanitizeHeaderValue($value);
+ if(strlen($key)) $this->mail['header'][$key] = $value;
}
return $this;
}
@@ -761,9 +794,7 @@ class WireMail extends WireData implements WireMailInterface {
*
*/
protected function ___htmlToText($html) {
- $textTools = new WireTextTools();
- $this->wire($textTools);
- $text = $textTools->markupToText($html);
+ $text = $this->wire('sanitizer')->getTextTools()->markupToText($html);
$text = str_replace("\n", "\r\n", $text);
$text = $this->strReplace($text, $this->multipartBoundary());
return $text;
diff --git a/wire/core/WireTextTools.php b/wire/core/WireTextTools.php
index bd4b8705..d9c02a4d 100644
--- a/wire/core/WireTextTools.php
+++ b/wire/core/WireTextTools.php
@@ -46,28 +46,44 @@ class WireTextTools extends Wire {
* - `splitBlocks` (string): String to split paragraph and header elements. (default="\n\n")
* - `convertEntities` (bool): Convert HTML entities to plain text equivalents? (default=true)
* - `listItemPrefix` (string): Prefix for converted list item `` elements. (default='• ')
+ * - `linksToUrls` (bool): Convert links to "(url)" rather than removing entirely? (default=true) Since 3.0.132
+ * - `uppercaseHeadlines` (bool): Convert headline tags to uppercase? (default=false) Since 3.0.132
+ * - `underlineHeadlines` (bool): Underline headlines with "=" or "-"? (default=true) Since 3.0.132
+ * - `collapseSpaces` (bool): Collapse extra/redundant extra spaces to single space? (default=true) Since 3.0.132
* - `replacements` (array): Associative array of strings to manually replace. (default=[' ' => ' '])
* @return string
*
*/
public function markupToText($str, array $options = array()) {
-
+
$defaults = array(
'keepTags' => array(),
+ 'linksToUrls' => true, // convert links to just URL rather than removing entirely
'splitBlocks' => "\n\n",
+ 'uppercaseHeadlines' => false,
+ 'underlineHeadlines' => true,
'convertEntities' => true,
'listItemPrefix' => '• ',
+ 'preIndent' => '', // indent for text within a
+ 'collapseSpaces' => true,
'replacements' => array(
' ' => ' '
),
+ 'finishReplacements' => array(), // replacements applied at very end (internal)
);
+ // merge options using arrays
+ foreach(array('replacements') as $key) {
+ if(!isset($options[$key])) continue;
+ $options[$key] = array_merge($defaults[$key], $options[$key]);
+ }
+
$options = array_merge($defaults, $options);
if(strpos($str, '>') !== false) {
// strip out everything up to and including , if present
- if(strpos($str, '') !== false) list(, $str) = explode('', $str);
+ if(strpos($str, '') !== false) list(, $str) = explode('', $str);
// ensure tags are separated by whitespace
$str = str_replace('><', '> <', $str);
@@ -83,22 +99,79 @@ class WireTextTools extends Wire {
}
// ensure paragraphs and headers are followed by two newlines
- if(stripos($str, '
') || stripos($str, ')!i', '$1' . $options['splitBlocks'], $str);
+ if(stripos($str, ')!i', '$1' . $options['splitBlocks'], $str);
}
// ensure list items are on their own line and prefixed with a bullet
if(stripos($str, ']*>!i', "\n$prefix", $str);
+ $prefix = in_array('li', $options['keepTags']) ? '' : $options['listItemPrefix'];
+ $str = preg_replace('![\s\r\n]+]*>[\s\r\n]*!i', "\n$prefix", $str);
+ if($prefix) $options['replacements']["\n$prefix "] = "\n$prefix"; // prevent extra space
}
// convert
tags to be just a single newline
if(stripos($str, '
', '
', '
'), "
\n", $str);
+ $str = str_replace(array('
', '
', '
', ''), "
\n", $str);
while(stripos($str, "\n
") !== false) $str = str_replace("\n
", "
", $str);
while(stripos($str, "
\n\n") !== false) $str = str_replace("
\n\n", "
\n", $str);
}
+
+ // make headlines more prominent with underlines or uppercase
+ if(($options['uppercaseHeadlines'] || $options['underlineHeadlines']) && stripos($str, ']*>(.+?)\1>!is', $str, $matches)) {
+ foreach($matches[2] as $key => $headline) {
+ $fullMatch = $matches[0][$key];
+ $tagName = strtolower($matches[1][$key]);
+ $underline = '';
+ if($options['underlineHeadlines']) {
+ $char = $tagName === $topHtag ? '=' : '-';
+ $underline = "\n" . str_repeat($char, $this->strlen($headline));
+ }
+ if($options['uppercaseHeadlines']) $headline = strtoupper($headline);
+ $str = str_replace($fullMatch, "<$tagName>$headline$tagName>$underline", $str);
+ }
+ }
+ }
+
+ // convert "text" tags to "text (url)"
+ if($options['linksToUrls'] && stripos($str, ']*href=([^\s>]+)[^<>]*>(.+?)!is', $str, $matches)) {
+ $links = array();
+ foreach($matches[0] as $key => $fullMatch) {
+ $href = trim($matches[1][$key], '"\'');
+ if(strpos($href, '#') === 0) continue; // do not convert jumplinks
+ $anchorText = $matches[2][$key];
+ $links[$fullMatch] = "$anchorText ($href)";
+ }
+ if(count($links)) {
+ $str = str_replace(array_keys($links), array_values($links), $str);
+ }
+ }
+ }
+
+ // indent within ...
sections
+ if(strlen($options['preIndent']) && strpos($str, '|\s[^>]*>)(.+?)
!is', $str, $matches)) {
+ foreach($matches[0] as $key => $fullMatch) {
+ $lines = explode("\n", $matches[1][$key]);
+ foreach($lines as $k => $line) {
+ $lines[$k] = ':preIndent:' . rtrim($line);
+ }
+ $str = str_replace($fullMatch, implode("\n", $lines), $str);
+ $options['finishReplacements'][':preIndent:'] = $options['preIndent'];
+ }
+ }
+ }
}
// strip tags
@@ -126,11 +199,20 @@ class WireTextTools extends Wire {
if($options['convertEntities'] && strpos($str, '&') !== false) {
$str = $this->wire('sanitizer')->unentities($str);
}
+
+ // collapse any redundant/extra whitespace
+ if($options['collapseSpaces']) {
+ while(strpos($str, ' ') !== false) $str = str_replace(' ', ' ', $str);
+ }
// normalize newlines and whitespace around newlines
while(strpos($str, " \n") !== false) $str = str_replace(" \n", "\n", $str);
while(strpos($str, "\n ") !== false) $str = str_replace("\n ", "\n", $str);
while(strpos($str, "\n\n\n") !== false) $str = str_replace("\n\n\n", "\n\n", $str);
+
+ if(count($options['finishReplacements'])) {
+ $str = str_replace(array_keys($options['finishReplacements']), array_values($options['finishReplacements']), $str);
+ }
return trim($str);
}