mirror of
https://github.com/processwire/processwire.git
synced 2025-08-15 11:14:12 +02:00
Several major improvements to WireTextTools::markupToText() method, plus related updates in Sanitizer and WireMail classes
This commit is contained in:
@@ -1093,18 +1093,23 @@ class Sanitizer extends Wire {
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a value that may be used in an email header
|
||||
* Returns a value that may be used in an email header
|
||||
*
|
||||
* This method is designed to prevent one email header from injecting into another.
|
||||
*
|
||||
* #pw-group-strings
|
||||
*
|
||||
* @param string $value
|
||||
* @param bool $headerName Sanitize a header name rather than header value? (default=false) Since 3.0.132
|
||||
* @return string
|
||||
*
|
||||
*/
|
||||
public function emailHeader($value) {
|
||||
public function emailHeader($value, $headerName = false) {
|
||||
if(!is_string($value)) return '';
|
||||
$a = array("\n", "\r", "<CR>", "<LF>", "0x0A", "0x0D", "%0A", "%0D", 'content-type:', 'bcc:', 'cc:', 'to:', 'reply-to:');
|
||||
return trim(str_ireplace($a, ' ', $value));
|
||||
$a = array("\n", "\r", "<CR>", "<LF>", "0x0A", "0x0D", "%0A", "%0D"); // newlines
|
||||
$value = trim(str_ireplace($a, ' ', stripslashes($value)));
|
||||
if($headerName) $value = trim(preg_replace('/[^-_a-zA-Z0-9]/', '-', trim($value, ':')), '-');
|
||||
return $value;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -1354,6 +1359,16 @@ class Sanitizer extends Wire {
|
||||
/**
|
||||
* Convert a string containing markup or entities to be plain text
|
||||
*
|
||||
* This is one implementation but there is also a better one that you may prefer with the
|
||||
* `WireTextTools::markupToText()` method:
|
||||
*
|
||||
* ~~~~~
|
||||
* $markup = '<html>a bunch of HTML here</html>';
|
||||
* // try both to see what you prefer:
|
||||
* $text1 = $sanitizer->markupToText($html);
|
||||
* $text2 = $sanitizer->getTextTools()->markupToText();
|
||||
* ~~~~~
|
||||
*
|
||||
* #pw-group-strings
|
||||
*
|
||||
* @param string $value String you want to convert
|
||||
@@ -1363,6 +1378,7 @@ class Sanitizer extends Wire {
|
||||
* - `entities` (bool): Entity encode returned value? (default=false).
|
||||
* - `trim` (string): Character(s) to trim from beginning and end of value (default=" -,:;|\n\t").
|
||||
* @return string Converted string of text
|
||||
* @see WireTextTools::markupToText() for different though likely better (for most cases) implementation.
|
||||
*
|
||||
*/
|
||||
public function markupToText($value, array $options = array()) {
|
||||
|
@@ -52,6 +52,8 @@
|
||||
*
|
||||
* @method int send() Send email.
|
||||
* @method string htmlToText($html) Convert HTML email body to TEXT email body.
|
||||
* @method string sanitizeHeaderName($name) #pw-internal
|
||||
* @method string sanitizeHeaderValue($value) #pw-internal
|
||||
*
|
||||
* @property array $to To email address.
|
||||
* @property array $toName Optional person’s name to accompany “to” email address
|
||||
@@ -67,6 +69,7 @@
|
||||
* @property array $param Associative array of aditional params (likely not applicable to most WireMail modules).
|
||||
* @property array $attachments Array of file attachments (if populated and where supported) #pw-advanced
|
||||
* @property string $newline Newline character, populated only if different from CRLF. #pw-advanced
|
||||
*
|
||||
*
|
||||
*/
|
||||
|
||||
@@ -160,14 +163,46 @@ class WireMail extends WireData implements WireMailInterface {
|
||||
}
|
||||
|
||||
/**
|
||||
* Sanitize string for use in a email header
|
||||
* Sanitize and normalize a header name
|
||||
*
|
||||
* @param string $name
|
||||
* @return string
|
||||
* @since 3.0.132
|
||||
*
|
||||
*/
|
||||
protected function ___sanitizeHeaderName($name) {
|
||||
/** @var Sanitizer $sanitizer */
|
||||
$sanitizer = $this->wire('sanitizer');
|
||||
$name = $sanitizer->emailHeader($name, true);
|
||||
// ensure consistent capitalization for header names
|
||||
$name = ucwords(str_replace('-', ' ', $name));
|
||||
$name = str_replace(' ', '-', $name);
|
||||
return $name;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sanitize an email header header value
|
||||
*
|
||||
* @param string $value
|
||||
* @return string
|
||||
* @since 3.0.132
|
||||
*
|
||||
*/
|
||||
protected function ___sanitizeHeaderValue($value) {
|
||||
return $this->wire('sanitizer')->emailHeader($value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Alias of sanitizeHeaderValue() method for backwards compatibility
|
||||
*
|
||||
* #pw-internal
|
||||
*
|
||||
* @param string $header
|
||||
* @return string
|
||||
*
|
||||
*
|
||||
*/
|
||||
protected function sanitizeHeader($header) {
|
||||
return $this->wire('sanitizer')->emailHeader($header);
|
||||
return $this->sanitizeHeaderValue($header);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -182,7 +217,7 @@ class WireMail extends WireData implements WireMailInterface {
|
||||
if(strpos($email, '<') !== false && strpos($email, '>') !== false) {
|
||||
// email has separate from name and email
|
||||
if(preg_match('/^(.*?)<([^>]+)>.*$/', $email, $matches)) {
|
||||
$name = $this->sanitizeHeader($matches[1]);
|
||||
$name = $this->sanitizeHeaderValue($matches[1]);
|
||||
$email = $matches[2];
|
||||
}
|
||||
}
|
||||
@@ -203,7 +238,7 @@ class WireMail extends WireData implements WireMailInterface {
|
||||
protected function bundleEmailAndName($email, $name) {
|
||||
$email = $this->sanitizeEmail($email);
|
||||
if(!strlen($name)) return $email;
|
||||
$name = $this->sanitizeHeader($name);
|
||||
$name = $this->sanitizeHeaderValue($name);
|
||||
$delim = '';
|
||||
if(strpos($name, ',') !== false) {
|
||||
// name contains a comma, so quote the value
|
||||
@@ -265,7 +300,7 @@ class WireMail extends WireData implements WireMailInterface {
|
||||
$toEmail = $this->sanitizeEmail($toEmail);
|
||||
if(strlen($toEmail)) {
|
||||
$this->mail['to'][$toEmail] = $toEmail;
|
||||
$this->mail['toName'][$toEmail] = $this->sanitizeHeader($toName);
|
||||
$this->mail['toName'][$toEmail] = $this->sanitizeHeaderValue($toName);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -289,7 +324,7 @@ class WireMail extends WireData implements WireMailInterface {
|
||||
$emails = $this->mail['to'];
|
||||
if(!count($emails)) throw new WireException("Please set a 'to' address before setting a name.");
|
||||
$email = end($emails);
|
||||
$this->mail['toName'][$email] = $this->sanitizeHeader($name);
|
||||
$this->mail['toName'][$email] = $this->sanitizeHeaderValue($name);
|
||||
return $this;
|
||||
}
|
||||
|
||||
@@ -324,7 +359,7 @@ class WireMail extends WireData implements WireMailInterface {
|
||||
*
|
||||
*/
|
||||
public function fromName($name) {
|
||||
$this->mail['fromName'] = $this->sanitizeHeader($name);
|
||||
$this->mail['fromName'] = $this->sanitizeHeaderValue($name);
|
||||
return $this;
|
||||
}
|
||||
|
||||
@@ -343,7 +378,7 @@ class WireMail extends WireData implements WireMailInterface {
|
||||
} else {
|
||||
$email = $this->sanitizeEmail($email);
|
||||
}
|
||||
if($name) $this->mail['replyToName'] = $this->sanitizeHeader($name);
|
||||
if($name) $this->mail['replyToName'] = $this->sanitizeHeaderValue($name);
|
||||
$this->mail['replyTo'] = $email;
|
||||
if(empty($name) && !empty($this->mail['replyToName'])) $name = $this->mail['replyToName'];
|
||||
if(strlen($name)) $email = $this->bundleEmailAndName($email, $name);
|
||||
@@ -360,7 +395,7 @@ class WireMail extends WireData implements WireMailInterface {
|
||||
*/
|
||||
public function replyToName($name) {
|
||||
if(strlen($this->mail['replyTo'])) return $this->replyTo($this->mail['replyTo'], $name);
|
||||
$this->mail['replyToName'] = $this->sanitizeHeader($name);
|
||||
$this->mail['replyToName'] = $this->sanitizeHeaderValue($name);
|
||||
return $this;
|
||||
}
|
||||
|
||||
@@ -372,7 +407,7 @@ class WireMail extends WireData implements WireMailInterface {
|
||||
*
|
||||
*/
|
||||
public function subject($subject) {
|
||||
$this->mail['subject'] = $this->sanitizeHeader($subject);
|
||||
$this->mail['subject'] = $this->sanitizeHeaderValue($subject);
|
||||
return $this;
|
||||
}
|
||||
|
||||
@@ -430,15 +465,13 @@ class WireMail extends WireData implements WireMailInterface {
|
||||
if(is_array($key)) {
|
||||
$this->headers($key);
|
||||
} else {
|
||||
$key = $this->sanitizeHeaderName($key);
|
||||
unset($this->mail['header'][$key]);
|
||||
}
|
||||
} else {
|
||||
$k = $this->wire('sanitizer')->name($this->sanitizeHeader($key));
|
||||
// ensure consistent capitalization for all header keys
|
||||
$k = ucwords(str_replace('-', ' ', $k));
|
||||
$k = str_replace(' ', '-', $k);
|
||||
$v = $this->sanitizeHeader($value);
|
||||
$this->mail['header'][$k] = $v;
|
||||
} else {
|
||||
$key = $this->sanitizeHeaderName($key);
|
||||
$value = $this->sanitizeHeaderValue($value);
|
||||
if(strlen($key)) $this->mail['header'][$key] = $value;
|
||||
}
|
||||
return $this;
|
||||
}
|
||||
@@ -761,9 +794,7 @@ class WireMail extends WireData implements WireMailInterface {
|
||||
*
|
||||
*/
|
||||
protected function ___htmlToText($html) {
|
||||
$textTools = new WireTextTools();
|
||||
$this->wire($textTools);
|
||||
$text = $textTools->markupToText($html);
|
||||
$text = $this->wire('sanitizer')->getTextTools()->markupToText($html);
|
||||
$text = str_replace("\n", "\r\n", $text);
|
||||
$text = $this->strReplace($text, $this->multipartBoundary());
|
||||
return $text;
|
||||
|
@@ -46,28 +46,44 @@ class WireTextTools extends Wire {
|
||||
* - `splitBlocks` (string): String to split paragraph and header elements. (default="\n\n")
|
||||
* - `convertEntities` (bool): Convert HTML entities to plain text equivalents? (default=true)
|
||||
* - `listItemPrefix` (string): Prefix for converted list item `<li>` elements. (default='• ')
|
||||
* - `linksToUrls` (bool): Convert links to "(url)" rather than removing entirely? (default=true) Since 3.0.132
|
||||
* - `uppercaseHeadlines` (bool): Convert headline tags to uppercase? (default=false) Since 3.0.132
|
||||
* - `underlineHeadlines` (bool): Underline headlines with "=" or "-"? (default=true) Since 3.0.132
|
||||
* - `collapseSpaces` (bool): Collapse extra/redundant extra spaces to single space? (default=true) Since 3.0.132
|
||||
* - `replacements` (array): Associative array of strings to manually replace. (default=[' ' => ' '])
|
||||
* @return string
|
||||
*
|
||||
*/
|
||||
public function markupToText($str, array $options = array()) {
|
||||
|
||||
|
||||
$defaults = array(
|
||||
'keepTags' => array(),
|
||||
'linksToUrls' => true, // convert links to just URL rather than removing entirely
|
||||
'splitBlocks' => "\n\n",
|
||||
'uppercaseHeadlines' => false,
|
||||
'underlineHeadlines' => true,
|
||||
'convertEntities' => true,
|
||||
'listItemPrefix' => '• ',
|
||||
'preIndent' => '', // indent for text within a <pre>
|
||||
'collapseSpaces' => true,
|
||||
'replacements' => array(
|
||||
' ' => ' '
|
||||
),
|
||||
'finishReplacements' => array(), // replacements applied at very end (internal)
|
||||
);
|
||||
|
||||
// merge options using arrays
|
||||
foreach(array('replacements') as $key) {
|
||||
if(!isset($options[$key])) continue;
|
||||
$options[$key] = array_merge($defaults[$key], $options[$key]);
|
||||
}
|
||||
|
||||
$options = array_merge($defaults, $options);
|
||||
|
||||
if(strpos($str, '>') !== false) {
|
||||
|
||||
// strip out everything up to and including </head>, if present
|
||||
if(strpos($str, '</head>') !== false) list(, $str) = explode('</head>', $str);
|
||||
if(strpos($str, '</head>') !== false) list(, $str) = explode('</head>', $str);
|
||||
|
||||
// ensure tags are separated by whitespace
|
||||
$str = str_replace('><', '> <', $str);
|
||||
@@ -83,22 +99,79 @@ class WireTextTools extends Wire {
|
||||
}
|
||||
|
||||
// ensure paragraphs and headers are followed by two newlines
|
||||
if(stripos($str, '</p>') || stripos($str, '</h')) {
|
||||
$str = preg_replace('!(</(?:p|h\d)>)!i', '$1' . $options['splitBlocks'], $str);
|
||||
if(stripos($str, '</p') || stripos($str, '</h') || stripos($str, '</li') || stripos($str, '</bl') || stripos($str, '</div')) {
|
||||
$str = preg_replace('!(</(?:p|h\d|ul|ol|pre|blockquote|div)>)!i', '$1' . $options['splitBlocks'], $str);
|
||||
}
|
||||
|
||||
// ensure list items are on their own line and prefixed with a bullet
|
||||
if(stripos($str, '<li') !== false) {
|
||||
$prefix = in_array('li', $options['keepTags']) ? '' : $options['listItemPrefix'];
|
||||
$str = preg_replace('![\s\r\n]+<li[^>]*>!i', "\n<li>$prefix", $str);
|
||||
$prefix = in_array('li', $options['keepTags']) ? '' : $options['listItemPrefix'];
|
||||
$str = preg_replace('![\s\r\n]+<li[^>]*>[\s\r\n]*!i', "\n<li>$prefix", $str);
|
||||
if($prefix) $options['replacements']["\n$prefix "] = "\n$prefix"; // prevent extra space
|
||||
}
|
||||
|
||||
// convert <br> tags to be just a single newline
|
||||
if(stripos($str, '<br') !== false) {
|
||||
$str = str_replace(array('<br>', '<br/>', '<br />'), "<br>\n", $str);
|
||||
$str = str_replace(array('<br>', '<br/>', '<br />', '</li>'), "<br>\n", $str);
|
||||
while(stripos($str, "\n<br>") !== false) $str = str_replace("\n<br>", "<br>", $str);
|
||||
while(stripos($str, "<br>\n\n") !== false) $str = str_replace("<br>\n\n", "<br>\n", $str);
|
||||
}
|
||||
|
||||
// make headlines more prominent with underlines or uppercase
|
||||
if(($options['uppercaseHeadlines'] || $options['underlineHeadlines']) && stripos($str, '<h') !== false) {
|
||||
$topHtag = '';
|
||||
if($options['underlineHeadlines']) {
|
||||
// determine which is the top level headline tag
|
||||
for($n = 1; $n <= 6; $n++) {
|
||||
if(stripos($str, "<h$n") === false) continue;
|
||||
$topHtag = "h$n";
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(preg_match_all('!<(h[123456])[^>]*>(.+?)</\1>!is', $str, $matches)) {
|
||||
foreach($matches[2] as $key => $headline) {
|
||||
$fullMatch = $matches[0][$key];
|
||||
$tagName = strtolower($matches[1][$key]);
|
||||
$underline = '';
|
||||
if($options['underlineHeadlines']) {
|
||||
$char = $tagName === $topHtag ? '=' : '-';
|
||||
$underline = "\n" . str_repeat($char, $this->strlen($headline));
|
||||
}
|
||||
if($options['uppercaseHeadlines']) $headline = strtoupper($headline);
|
||||
$str = str_replace($fullMatch, "<$tagName>$headline</$tagName>$underline", $str);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// convert "<a href='url'>text</a>" tags to "text (url)"
|
||||
if($options['linksToUrls'] && stripos($str, '<a ') !== false) {
|
||||
if(preg_match_all('!<a\s[^<>]*href=([^\s>]+)[^<>]*>(.+?)</a>!is', $str, $matches)) {
|
||||
$links = array();
|
||||
foreach($matches[0] as $key => $fullMatch) {
|
||||
$href = trim($matches[1][$key], '"\'');
|
||||
if(strpos($href, '#') === 0) continue; // do not convert jumplinks
|
||||
$anchorText = $matches[2][$key];
|
||||
$links[$fullMatch] = "$anchorText ($href)";
|
||||
}
|
||||
if(count($links)) {
|
||||
$str = str_replace(array_keys($links), array_values($links), $str);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// indent within <pre>...</pre> sections
|
||||
if(strlen($options['preIndent']) && strpos($str, '<pre') !== false) {
|
||||
if(preg_match_all('!<pre(?:>|\s[^>]*>)(.+?)</pre>!is', $str, $matches)) {
|
||||
foreach($matches[0] as $key => $fullMatch) {
|
||||
$lines = explode("\n", $matches[1][$key]);
|
||||
foreach($lines as $k => $line) {
|
||||
$lines[$k] = ':preIndent:' . rtrim($line);
|
||||
}
|
||||
$str = str_replace($fullMatch, implode("\n", $lines), $str);
|
||||
$options['finishReplacements'][':preIndent:'] = $options['preIndent'];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// strip tags
|
||||
@@ -126,11 +199,20 @@ class WireTextTools extends Wire {
|
||||
if($options['convertEntities'] && strpos($str, '&') !== false) {
|
||||
$str = $this->wire('sanitizer')->unentities($str);
|
||||
}
|
||||
|
||||
// collapse any redundant/extra whitespace
|
||||
if($options['collapseSpaces']) {
|
||||
while(strpos($str, ' ') !== false) $str = str_replace(' ', ' ', $str);
|
||||
}
|
||||
|
||||
// normalize newlines and whitespace around newlines
|
||||
while(strpos($str, " \n") !== false) $str = str_replace(" \n", "\n", $str);
|
||||
while(strpos($str, "\n ") !== false) $str = str_replace("\n ", "\n", $str);
|
||||
while(strpos($str, "\n\n\n") !== false) $str = str_replace("\n\n\n", "\n\n", $str);
|
||||
|
||||
if(count($options['finishReplacements'])) {
|
||||
$str = str_replace(array_keys($options['finishReplacements']), array_values($options['finishReplacements']), $str);
|
||||
}
|
||||
|
||||
return trim($str);
|
||||
}
|
||||
|
Reference in New Issue
Block a user