From 0537365cac67f8b9d8b9f8d10459de1488569372 Mon Sep 17 00:00:00 2001 From: rxu Date: Tue, 23 Nov 2021 15:12:43 +0700 Subject: [PATCH] [ticket/16912] Rewrite email headers encoding method Based on symfony/polyfill-iconv implementations. PHPBB3-16912 --- phpBB/includes/functions_messenger.php | 98 +++++++++++++++----------- 1 file changed, 58 insertions(+), 40 deletions(-) diff --git a/phpBB/includes/functions_messenger.php b/phpBB/includes/functions_messenger.php index b7f3eeda36..52c205ceb5 100644 --- a/phpBB/includes/functions_messenger.php +++ b/phpBB/includes/functions_messenger.php @@ -1840,66 +1840,84 @@ class smtp_class } /** -* Encodes the given string for proper display in UTF-8. -* -* This version is using base64 encoded data. The downside of this -* is if the mail client does not understand this encoding the user -* is basically doomed with an unreadable subject. -* -* Please note that this version fully supports RFC 2045 section 6.8. -* -* @param string $str -* @param string $eol End of line we are using (optional to be backwards compatible) -*/ + * Encodes the given string for proper display in UTF-8 or US-ASCII. + * + * This version is based on iconv_mime_encode() implementation + * from symfomy/polyfill-iconv + * https://github.com/symfony/polyfill-iconv/blob/fd324208ec59a39ebe776e6e9ec5540ad4f40aaa/Iconv.php#L355 + * + * @param string $str + * @param string $eol End of line we are using (optional to be backwards compatible) + * + * @return string + */ function mail_encode($str, $eol = "\r\n") { // Check if string contains ASCII only characters $is_ascii = strlen($str) === utf8_strlen($str); - // Define start delimimter, end delimiter and spacer + $scheme = $is_ascii ? "Q" : "B"; + + // Define start delimimter, end delimiter // Use the Quoted-Printable encoding for ASCII strings to avoid unnecessary encoding in Base64 - $start = $is_ascii ? "=?US-ASCII?Q?" : "=?UTF-8?B?"; + $start = $is_ascii ? "=?US-ASCII?$scheme?" : "=?UTF-8?$scheme?"; $end = "?="; - $delimiter = "$eol "; // Maximum encoded-word length is 75 as per RFC 2047 section 2. // $split_length *must* be a multiple of 4, but <= 75 - strlen($start . $delimiter . $end)!!! - $split_length = 75 - strlen($start . $delimiter . $end); + $split_length = 75 - strlen($start . $eol . $end); $split_length = $split_length - $split_length % 4; - // Use the Quoted-Printable encoding for ASCII strings to avoid unnecessary encoding in Base64 - // quoted_printable_encode() splits lines at length of 75 characters with =\r\n delimiter, amend this feature - $encoded_str = $is_ascii ? str_replace("=\r\n", '', quoted_printable_encode($str)) : base64_encode($str); + $line_length = \strlen($start) + \strlen($end); + $line_offset = \strlen($start) + 1; + $line_data = ''; - // If encoded string meets the limits, we just return with the correct data. - if (strlen($encoded_str) <= $split_length) + $is_quoted_printable = 'Q' === $scheme; + + preg_match_all('/./us', $str, $chars); + $chars = $chars[0] ?? []; + + $str = []; + foreach ($chars as $char) { - return $start . $encoded_str . $end; - } + $encoded_char = $is_quoted_printable + ? $char = preg_replace_callback( + '/[=_\?\x20\x00-\x1F\x80-\xFF]/', + function ($matches) + { + $hex = dechex(\ord($matches[0])); + $hex = strlen($hex) == 1 ? "0$hex" : $hex; + return '=' . strtoupper($hex); + }, + $char + ) + : base64_encode($line_data . $char); - // If there is only ASCII data, we just return what we want, correctly splitting the lines. - if ($is_ascii) - { - return $start . implode($end . $delimiter . $start, str_split($encoded_str, $split_length)) . $end; - } - - // UTF-8 data, compose encoded lines - $array = utf8_str_split($str); - $str = ''; - - while (count($array)) - { - $text = ''; - - while (count($array) && intval((strlen($text . $array[0]) + 2) / 3) << 2 <= $split_length) + if (isset($encoded_char[$split_length - $line_length])) { - $text .= array_shift($array); + if (!$is_quoted_printable) + { + $line_data = base64_encode($line_data); + } + $str[] = $start . $line_data . $end; + $line_length = $line_offset; + $line_data = ''; } - $str .= $start . base64_encode($text) . $end . $delimiter; + $line_data .= $char; + $is_quoted_printable && $line_length += \strlen($char); } - return substr($str, 0, -strlen($delimiter)); + if ('' !== $line_data) + { + if (!$is_quoted_printable) + { + $line_data = base64_encode($line_data); + } + $str[] = $start . $line_data . $end; + } + + return implode($eol . ' ', $str); } /**