diff --git a/wire/core/Sanitizer.php b/wire/core/Sanitizer.php index 1d586f0c..e7ffeb5d 100644 --- a/wire/core/Sanitizer.php +++ b/wire/core/Sanitizer.php @@ -1093,18 +1093,23 @@ class Sanitizer extends Wire { } /** - * Returns a value that may be used in an email header + * Returns a value that may be used in an email header + * + * This method is designed to prevent one email header from injecting into another. * * #pw-group-strings * * @param string $value + * @param bool $headerName Sanitize a header name rather than header value? (default=false) Since 3.0.132 * @return string * */ - public function emailHeader($value) { + public function emailHeader($value, $headerName = false) { if(!is_string($value)) return ''; - $a = array("\n", "\r", "", "", "0x0A", "0x0D", "%0A", "%0D", 'content-type:', 'bcc:', 'cc:', 'to:', 'reply-to:'); - return trim(str_ireplace($a, ' ', $value)); + $a = array("\n", "\r", "", "", "0x0A", "0x0D", "%0A", "%0D"); // newlines + $value = trim(str_ireplace($a, ' ', stripslashes($value))); + if($headerName) $value = trim(preg_replace('/[^-_a-zA-Z0-9]/', '-', trim($value, ':')), '-'); + return $value; } /** @@ -1354,6 +1359,16 @@ class Sanitizer extends Wire { /** * Convert a string containing markup or entities to be plain text * + * This is one implementation but there is also a better one that you may prefer with the + * `WireTextTools::markupToText()` method: + * + * ~~~~~ + * $markup = 'a bunch of HTML here'; + * // try both to see what you prefer: + * $text1 = $sanitizer->markupToText($html); + * $text2 = $sanitizer->getTextTools()->markupToText(); + * ~~~~~ + * * #pw-group-strings * * @param string $value String you want to convert @@ -1363,6 +1378,7 @@ class Sanitizer extends Wire { * - `entities` (bool): Entity encode returned value? (default=false). * - `trim` (string): Character(s) to trim from beginning and end of value (default=" -,:;|\n\t"). * @return string Converted string of text + * @see WireTextTools::markupToText() for different though likely better (for most cases) implementation. * */ public function markupToText($value, array $options = array()) { diff --git a/wire/core/WireMail.php b/wire/core/WireMail.php index 210d45e9..8bdd526f 100644 --- a/wire/core/WireMail.php +++ b/wire/core/WireMail.php @@ -52,6 +52,8 @@ * * @method int send() Send email. * @method string htmlToText($html) Convert HTML email body to TEXT email body. + * @method string sanitizeHeaderName($name) #pw-internal + * @method string sanitizeHeaderValue($value) #pw-internal * * @property array $to To email address. * @property array $toName Optional person’s name to accompany “to” email address @@ -67,6 +69,7 @@ * @property array $param Associative array of aditional params (likely not applicable to most WireMail modules). * @property array $attachments Array of file attachments (if populated and where supported) #pw-advanced * @property string $newline Newline character, populated only if different from CRLF. #pw-advanced + * * */ @@ -160,14 +163,46 @@ class WireMail extends WireData implements WireMailInterface { } /** - * Sanitize string for use in a email header + * Sanitize and normalize a header name + * + * @param string $name + * @return string + * @since 3.0.132 + * + */ + protected function ___sanitizeHeaderName($name) { + /** @var Sanitizer $sanitizer */ + $sanitizer = $this->wire('sanitizer'); + $name = $sanitizer->emailHeader($name, true); + // ensure consistent capitalization for header names + $name = ucwords(str_replace('-', ' ', $name)); + $name = str_replace(' ', '-', $name); + return $name; + } + + /** + * Sanitize an email header header value + * + * @param string $value + * @return string + * @since 3.0.132 + * + */ + protected function ___sanitizeHeaderValue($value) { + return $this->wire('sanitizer')->emailHeader($value); + } + + /** + * Alias of sanitizeHeaderValue() method for backwards compatibility * + * #pw-internal + * * @param string $header * @return string - * + * */ protected function sanitizeHeader($header) { - return $this->wire('sanitizer')->emailHeader($header); + return $this->sanitizeHeaderValue($header); } /** @@ -182,7 +217,7 @@ class WireMail extends WireData implements WireMailInterface { if(strpos($email, '<') !== false && strpos($email, '>') !== false) { // email has separate from name and email if(preg_match('/^(.*?)<([^>]+)>.*$/', $email, $matches)) { - $name = $this->sanitizeHeader($matches[1]); + $name = $this->sanitizeHeaderValue($matches[1]); $email = $matches[2]; } } @@ -203,7 +238,7 @@ class WireMail extends WireData implements WireMailInterface { protected function bundleEmailAndName($email, $name) { $email = $this->sanitizeEmail($email); if(!strlen($name)) return $email; - $name = $this->sanitizeHeader($name); + $name = $this->sanitizeHeaderValue($name); $delim = ''; if(strpos($name, ',') !== false) { // name contains a comma, so quote the value @@ -265,7 +300,7 @@ class WireMail extends WireData implements WireMailInterface { $toEmail = $this->sanitizeEmail($toEmail); if(strlen($toEmail)) { $this->mail['to'][$toEmail] = $toEmail; - $this->mail['toName'][$toEmail] = $this->sanitizeHeader($toName); + $this->mail['toName'][$toEmail] = $this->sanitizeHeaderValue($toName); } } @@ -289,7 +324,7 @@ class WireMail extends WireData implements WireMailInterface { $emails = $this->mail['to']; if(!count($emails)) throw new WireException("Please set a 'to' address before setting a name."); $email = end($emails); - $this->mail['toName'][$email] = $this->sanitizeHeader($name); + $this->mail['toName'][$email] = $this->sanitizeHeaderValue($name); return $this; } @@ -324,7 +359,7 @@ class WireMail extends WireData implements WireMailInterface { * */ public function fromName($name) { - $this->mail['fromName'] = $this->sanitizeHeader($name); + $this->mail['fromName'] = $this->sanitizeHeaderValue($name); return $this; } @@ -343,7 +378,7 @@ class WireMail extends WireData implements WireMailInterface { } else { $email = $this->sanitizeEmail($email); } - if($name) $this->mail['replyToName'] = $this->sanitizeHeader($name); + if($name) $this->mail['replyToName'] = $this->sanitizeHeaderValue($name); $this->mail['replyTo'] = $email; if(empty($name) && !empty($this->mail['replyToName'])) $name = $this->mail['replyToName']; if(strlen($name)) $email = $this->bundleEmailAndName($email, $name); @@ -360,7 +395,7 @@ class WireMail extends WireData implements WireMailInterface { */ public function replyToName($name) { if(strlen($this->mail['replyTo'])) return $this->replyTo($this->mail['replyTo'], $name); - $this->mail['replyToName'] = $this->sanitizeHeader($name); + $this->mail['replyToName'] = $this->sanitizeHeaderValue($name); return $this; } @@ -372,7 +407,7 @@ class WireMail extends WireData implements WireMailInterface { * */ public function subject($subject) { - $this->mail['subject'] = $this->sanitizeHeader($subject); + $this->mail['subject'] = $this->sanitizeHeaderValue($subject); return $this; } @@ -430,15 +465,13 @@ class WireMail extends WireData implements WireMailInterface { if(is_array($key)) { $this->headers($key); } else { + $key = $this->sanitizeHeaderName($key); unset($this->mail['header'][$key]); } - } else { - $k = $this->wire('sanitizer')->name($this->sanitizeHeader($key)); - // ensure consistent capitalization for all header keys - $k = ucwords(str_replace('-', ' ', $k)); - $k = str_replace(' ', '-', $k); - $v = $this->sanitizeHeader($value); - $this->mail['header'][$k] = $v; + } else { + $key = $this->sanitizeHeaderName($key); + $value = $this->sanitizeHeaderValue($value); + if(strlen($key)) $this->mail['header'][$key] = $value; } return $this; } @@ -761,9 +794,7 @@ class WireMail extends WireData implements WireMailInterface { * */ protected function ___htmlToText($html) { - $textTools = new WireTextTools(); - $this->wire($textTools); - $text = $textTools->markupToText($html); + $text = $this->wire('sanitizer')->getTextTools()->markupToText($html); $text = str_replace("\n", "\r\n", $text); $text = $this->strReplace($text, $this->multipartBoundary()); return $text; diff --git a/wire/core/WireTextTools.php b/wire/core/WireTextTools.php index bd4b8705..d9c02a4d 100644 --- a/wire/core/WireTextTools.php +++ b/wire/core/WireTextTools.php @@ -46,28 +46,44 @@ class WireTextTools extends Wire { * - `splitBlocks` (string): String to split paragraph and header elements. (default="\n\n") * - `convertEntities` (bool): Convert HTML entities to plain text equivalents? (default=true) * - `listItemPrefix` (string): Prefix for converted list item `
  • ` elements. (default='• ') + * - `linksToUrls` (bool): Convert links to "(url)" rather than removing entirely? (default=true) Since 3.0.132 + * - `uppercaseHeadlines` (bool): Convert headline tags to uppercase? (default=false) Since 3.0.132 + * - `underlineHeadlines` (bool): Underline headlines with "=" or "-"? (default=true) Since 3.0.132 + * - `collapseSpaces` (bool): Collapse extra/redundant extra spaces to single space? (default=true) Since 3.0.132 * - `replacements` (array): Associative array of strings to manually replace. (default=[' ' => ' ']) * @return string * */ public function markupToText($str, array $options = array()) { - + $defaults = array( 'keepTags' => array(), + 'linksToUrls' => true, // convert links to just URL rather than removing entirely 'splitBlocks' => "\n\n", + 'uppercaseHeadlines' => false, + 'underlineHeadlines' => true, 'convertEntities' => true, 'listItemPrefix' => '• ', + 'preIndent' => '', // indent for text within a
    +			'collapseSpaces' => true,
     			'replacements' => array(
     				' ' => ' '
     			),
    +			'finishReplacements' => array(), // replacements applied at very end (internal)
     		);
     
    +		// merge options using arrays
    +		foreach(array('replacements') as $key) {
    +			if(!isset($options[$key])) continue;
    +			$options[$key] = array_merge($defaults[$key], $options[$key]);
    +		}
    +		
     		$options = array_merge($defaults, $options);
     
     		if(strpos($str, '>') !== false) {
     
     			// strip out everything up to and including , if present
    -			if(strpos($str, '') !== false) list(, $str) = explode('', $str); 
    +			if(strpos($str, '') !== false) list(, $str) = explode('', $str);
     
     			// ensure tags are separated by whitespace
     			$str = str_replace('><', '> <', $str);
    @@ -83,22 +99,79 @@ class WireTextTools extends Wire {
     			}
     
     			// ensure paragraphs and headers are followed by two newlines
    -			if(stripos($str, '

    ') || stripos($str, ')!i', '$1' . $options['splitBlocks'], $str); + if(stripos($str, ')!i', '$1' . $options['splitBlocks'], $str); } // ensure list items are on their own line and prefixed with a bullet if(stripos($str, ']*>!i', "\n
  • $prefix", $str); + $prefix = in_array('li', $options['keepTags']) ? '' : $options['listItemPrefix']; + $str = preg_replace('![\s\r\n]+]*>[\s\r\n]*!i', "\n
  • $prefix", $str); + if($prefix) $options['replacements']["\n$prefix "] = "\n$prefix"; // prevent extra space } // convert
    tags to be just a single newline if(stripos($str, '', '
    ', '
    '), "
    \n", $str); + $str = str_replace(array('
    ', '
    ', '
    ', '
  • '), "
    \n", $str); while(stripos($str, "\n
    ") !== false) $str = str_replace("\n
    ", "
    ", $str); while(stripos($str, "
    \n\n") !== false) $str = str_replace("
    \n\n", "
    \n", $str); } + + // make headlines more prominent with underlines or uppercase + if(($options['uppercaseHeadlines'] || $options['underlineHeadlines']) && stripos($str, ']*>(.+?)!is', $str, $matches)) { + foreach($matches[2] as $key => $headline) { + $fullMatch = $matches[0][$key]; + $tagName = strtolower($matches[1][$key]); + $underline = ''; + if($options['underlineHeadlines']) { + $char = $tagName === $topHtag ? '=' : '-'; + $underline = "\n" . str_repeat($char, $this->strlen($headline)); + } + if($options['uppercaseHeadlines']) $headline = strtoupper($headline); + $str = str_replace($fullMatch, "<$tagName>$headline$underline", $str); + } + } + } + + // convert "text" tags to "text (url)" + if($options['linksToUrls'] && stripos($str, ']*href=([^\s>]+)[^<>]*>(.+?)!is', $str, $matches)) { + $links = array(); + foreach($matches[0] as $key => $fullMatch) { + $href = trim($matches[1][$key], '"\''); + if(strpos($href, '#') === 0) continue; // do not convert jumplinks + $anchorText = $matches[2][$key]; + $links[$fullMatch] = "$anchorText ($href)"; + } + if(count($links)) { + $str = str_replace(array_keys($links), array_values($links), $str); + } + } + } + + // indent within
    ...
    sections + if(strlen($options['preIndent']) && strpos($str, '|\s[^>]*>)(.+?)
    !is', $str, $matches)) { + foreach($matches[0] as $key => $fullMatch) { + $lines = explode("\n", $matches[1][$key]); + foreach($lines as $k => $line) { + $lines[$k] = ':preIndent:' . rtrim($line); + } + $str = str_replace($fullMatch, implode("\n", $lines), $str); + $options['finishReplacements'][':preIndent:'] = $options['preIndent']; + } + } + } } // strip tags @@ -126,11 +199,20 @@ class WireTextTools extends Wire { if($options['convertEntities'] && strpos($str, '&') !== false) { $str = $this->wire('sanitizer')->unentities($str); } + + // collapse any redundant/extra whitespace + if($options['collapseSpaces']) { + while(strpos($str, ' ') !== false) $str = str_replace(' ', ' ', $str); + } // normalize newlines and whitespace around newlines while(strpos($str, " \n") !== false) $str = str_replace(" \n", "\n", $str); while(strpos($str, "\n ") !== false) $str = str_replace("\n ", "\n", $str); while(strpos($str, "\n\n\n") !== false) $str = str_replace("\n\n\n", "\n\n", $str); + + if(count($options['finishReplacements'])) { + $str = str_replace(array_keys($options['finishReplacements']), array_values($options['finishReplacements']), $str); + } return trim($str); }