diff --git a/wire/core/Sanitizer.php b/wire/core/Sanitizer.php
index 6dfa1f1c..0ab1c3a9 100644
--- a/wire/core/Sanitizer.php
+++ b/wire/core/Sanitizer.php
@@ -63,6 +63,12 @@ class Sanitizer extends Wire {
*/
protected $allowedASCII = array();
+ /**
+ * @var null|WireTextTools
+ *
+ */
+ protected $textTools = null;
+
/**
* Construct the sanitizer
*
@@ -1809,9 +1815,70 @@ class Sanitizer extends Wire {
* @return string String without newlines
*
*/
- function removeNewlines($str, $replacement = ' ') {
+ public function removeNewlines($str, $replacement = ' ') {
return str_replace(array("\r\n", "\r", "\n"), $replacement, $str);
}
+
+ /**
+ * Truncate string to given maximum length without breaking words
+ *
+ * This method can truncate between words, sentences, punctuation or blocks (like paragraphs).
+ * See the `type` option for details on how it should truncate. By default it truncates between
+ * words. Description of types:
+ *
+ * - word: truncate to closest word.
+ * - punctuation: truncate to closest punctuation within sentence.
+ * - sentence: truncate to closest sentence.
+ * - block: truncate to closest block of text (like a paragraph or headline).
+ *
+ * Note that if your specified `type` is something other than “word”, and it cannot be matched
+ * within the maxLength, then it will attempt a different type. For instance, if you specify
+ * “sentence” as the type, and it cannot match a sentence, it will try to match to “punctuation”
+ * instead. If it cannot match that, then it will attempt “word”.
+ *
+ * HTML will be stripped from returned string. If you want to keep some tags use the `keepTags` or `keepFormatTags`
+ * options to specify what tags are allowed to remain. The `keepFormatTags` option that, when true, will make it
+ * retain all HTML inline text formatting tags.
+ *
+ * ~~~~~~~
+ * // Truncate string to closest word within 150 characters
+ * $s = $sanitizer->truncate($str, 150);
+ *
+ * // Truncate string to closest sentence within 300 characters
+ * $s = $sanitizer->truncate($str, 300, 'sentence');
+ *
+ * // Truncate with options
+ * $s = $sanitizer->truncate($str, [
+ * 'type' => 'punctuation',
+ * 'maxLength' => 300,
+ * 'visible' => true,
+ * 'more' => '…'
+ * ]);
+ * ~~~~~~~
+ *
+ * @param string $str String to truncate
+ * @param int|array $maxLength Maximum length of returned string, or specify $options array here.
+ * @param array|string $options Options array, or specify `type` option (string).
+ * - `type` (string): Preferred truncation type of word, punctuation, sentence, or block. (default='word')
+ * This is a “preferred type”, not an absolute one, because it will adjust to match what it can within your maxLength.
+ * - `maxLength` (int): Max characters for truncation, used only if $options array substituted for $maxLength argument.
+ * - `maximize` (bool): Include as much as possible within specified type and max-length? (default=true)
+ * If you specify false for the maximize option, it will truncate to first word, puncutation, sentence or block.
+ * - `visible` (bool): When true, invisible text (markup, entities, etc.) does not count towards string length. (default=false)
+ * - `trim` (string): Characters to trim from returned string. (default=',;/ ')
+ * - `noTrim` (string): Never trim these from end of returned string. (default=')]>}”»')
+ * - `more` (string): Append this to truncated strings that do not end with sentence punctuation. (default='…')
+ * - `keepTags` (array): HTML tags that should be kept in returned string. (default=[])
+ * - `keepFormatTags` (bool): Keep HTML text-formatting tags? Simpler alternative to keepTags option. (default=false)
+ * - `collapseLinesWith` (string): String to collapse lines with where the first is not punctuated. (default=' … ')
+ * - `convertEntities` (bool): Convert HTML entities to non-entity characters? (default=false)
+ * - `noEndSentence` (string): Strings that sentence may not end with, space-separated values (default='Mr. Mrs. …')
+ * @return string
+ *
+ */
+ function truncate($str, $maxLength = 300, $options = array()) {
+ return $this->getTextTools()->truncate($str, $maxLength, $options);
+ }
/**
* Removes 4-byte UTF-8 characters (like emoji) that produce error with with MySQL regular “UTF8” encoding
@@ -2447,6 +2514,20 @@ class Sanitizer extends Wire {
return $results;
}
+ /**
+ * Get instance of WireTextTools
+ *
+ * @return WireTextTools
+ *
+ */
+ public function getTextTools() {
+ if(!$this->textTools) {
+ $this->textTools = new WireTextTools();
+ $this->wire($this->textTools);
+ }
+ return $this->textTools;
+ }
+
/**********************************************************************************************************************
* FILE VALIDATORS
*
diff --git a/wire/core/WireTextTools.php b/wire/core/WireTextTools.php
new file mode 100644
index 00000000..330c137e
--- /dev/null
+++ b/wire/core/WireTextTools.php
@@ -0,0 +1,568 @@
+` elements. (default='• ')
+ * - `replacements` (array): Associative array of strings to manually replace. (default=[' ' => ' '])
+ * @return string
+ *
+ */
+ public function markupToText($str, array $options = array()) {
+
+ $defaults = array(
+ 'keepTags' => array(),
+ 'splitBlocks' => "\n\n",
+ 'convertEntities' => true,
+ 'listItemPrefix' => '• ',
+ 'replacements' => array(
+ ' ' => ' '
+ ),
+ );
+
+ $options = array_merge($defaults, $options);
+
+ if(strpos($str, '>') !== false) {
+
+ // ensure tags are separated by whitespace
+ $str = str_replace('><', '> <', $str);
+
+ // normalize newlines
+ if(strpos($str, "\r") !== false) {
+ $str = str_replace(array("\r\n", "\r"), "\n", $str);
+ }
+
+ // normalize tabs to spaces
+ if(strpos($str, "\t") !== false) {
+ $str = str_replace("\t", " ", $str);
+ }
+
+ // ensure paragraphs and headers are followed by two newlines
+ if(stripos($str, '
') || stripos($str, ')!i', '$1' . $options['splitBlocks'], $str);
+ }
+
+ // ensure list items are on their own line and prefixed with a bullet
+ if(stripos($str, ']*>!i', "\n$prefix", $str);
+ }
+
+ // convert
tags to be just a single newline
+ if(stripos($str, '
', '
', '
'), "
\n", $str);
+ while(stripos($str, "\n
") !== false) $str = str_replace("\n
", "
", $str);
+ while(stripos($str, "
\n\n") !== false) $str = str_replace("
\n\n", "
\n", $str);
+ }
+ }
+
+ // normalize newlines and whitespace around newlines
+ while(strpos($str, " \n") !== false) $str = str_replace(" \n", "\n", $str);
+ while(strpos($str, "\n ") !== false) $str = str_replace("\n ", "\n", $str);
+ while(strpos($str, "\n\n\n") !== false) $str = str_replace("\n\n\n", "\n\n", $str);
+
+ // strip tags
+ if(count($options['keepTags'])) {
+ // some tags will be allowed to remain
+ $keepTags = '';
+ foreach($options['keepTags'] as $tag) {
+ $keepTags .= "<" . trim($tag, "<>") . ">";
+ }
+ $str = strip_tags($str, $keepTags);
+
+ } else {
+ // not allowing any tags
+ $str = strip_tags($str);
+ // if any possible tag characters remain, drop them now
+ $str = str_replace(array('<', '>'), ' ', $str);
+ }
+
+ // apply any other replacements
+ foreach($options['replacements'] as $find => $replace) {
+ $str = str_ireplace($find, $replace, $str);
+ }
+
+ // convert entities to plain text equivalents
+ if($options['convertEntities'] && strpos($str, '&') !== false) {
+ $str = $this->unentities($str);
+ }
+
+ return trim($str);
+ }
+
+ /**
+ * Remove (or close) unclosed HTML tags from given string
+ *
+ * Remove unclosed tags:
+ * ---------------------
+ * At present, if it finds an unclosed tag, it removes all tags of the same kind.
+ * This is in order to keep the function fast, by delegating what it can to strip_tags().
+ * This is sufficient for our internal use here, but may not be ideal for all situations.
+ *
+ * Fix/close unclosed tags:
+ * ------------------------
+ * When the remove option is false, it will attempt to close unclosed tags rather than
+ * remove them. It doesn't know exactly where they should be closed, so it appends the
+ * close tags to the end of the string.
+ *
+ * @param string $str
+ * @param bool $remove Remove unclosed tags? If false, it will attempt to close them instead. (default=true)
+ * @param array $options
+ * - `ignoreTags` (array): Tags that can be ignored because they close themselves. (default=per HTML spec)
+ * @return string
+ *
+ */
+ public function fixUnclosedTags($str, $remove = true, $options = array()) {
+
+ $defaults = array(
+ 'ignoreTags' => array(
+ 'area','base','br','col','command','embed','hr','img','input',
+ 'keygen','link','menuitem','meta','param','source','track','wbr',
+ ),
+ );
+
+ if(isset($options['ignoreTags'])) {
+ // merge user specified ignoreTags with our defaults so that both are used
+ $options['ignoreTags'] = array_merge($defaults['ignoreTags'], $options['ignoreTags']);
+ }
+
+ $options = array_merge($defaults, $options);
+ $tags = array();
+ $unclosed = array();
+
+ $n1 = substr_count($str, '>');
+ $n2 = substr_count($str, '');
+
+ if($n1) $n1 = $n1 / 2;
+
+ // if the quantity of ">" is equal to double the quantity of "" then early exit
+ if($n1 === $n2) return $str;
+
+ // now check for string possibly ending with a partial tag, and remove if present
+ $n1 = strrpos($str, '<');
+ $n2 = strrpos($str, '>');
+ if($n1 > $n2) {
+ // string might end with a partial tag, i.e. "|\s*/>|\s[^>]+>)!i', $str, $matches)) return $str;
+
+ foreach($matches[1] as $key => $tag) {
+ if(strpos($matches[2][$key], '/>') !== false) continue; // ignore self closing tags
+ if(in_array(strtolower($tag), $options['ignoreTags'])) continue;
+ $tags[$tag] = $tag;
+ }
+
+ // count appearances of found tags
+ foreach($tags as $tag) {
+ // count number of open tags of this type
+ $openQty = substr_count($str, "<$tag>") + substr_count($str, "<$tag ");
+ // count number of closing tags of this type
+ $closeQty = substr_count($str, "$tag>");
+ // if quantities do not match, mark tag for deletion
+ if($openQty !== $closeQty) {
+ unset($tags[$tag]);
+ $unclosed[] = $tag;
+ }
+ }
+
+
+ if(count($unclosed)) {
+ if($remove) {
+ // strip all tags except those where open/close quantity matched
+ $keepTags = count($tags) ? '<' . implode('><', $tags) . '>' : '';
+ $str = strip_tags($str, $keepTags);
+ } else {
+ foreach($unclosed as $tag) {
+ $str .= "$tag>";
+ }
+ }
+ }
+
+ return $str;
+ }
+
+ /**
+ * Collapse string to plain text that all exists on a single long line without destroying words/punctuation.
+ *
+ * @param string $str String to collapse
+ * @param array $options
+ * - `stripTags` (bool): Strip markup tags? (default=true)
+ * - `keepTags` (array): Array of tag names to keep, if stripTags==true. (default=[])
+ * - `collapseLinesWith` (string): String to collapse newlines with. (default=' ')
+ * - `endBlocksWith` (string): Character or string to insert to identify paragraph/header separation (default='')
+ * - `convertEntities` (bool): Convert entity-encoded characters to text? (default=true)
+ * @return mixed|string
+ *
+ */
+ public function collapse($str, array $options = array()) {
+
+ $defaults = array(
+ 'stripTags' => true,
+ 'keepTags' => array(),
+ 'collapseLinesWith' => ' ',
+ 'endBlocksWith' => '',
+ 'convertEntities' => true,
+ );
+
+ $options = array_merge($defaults, $options);
+
+ if($options['stripTags']) {
+ $str = $this->markupToText($str, array(
+ 'convertEntities' => $options['convertEntities'],
+ 'keepTags' => $options['keepTags'],
+ ));
+ if(!strlen($str)) return $str;
+ }
+
+ // character that we collapse lines with
+ $r = $options['collapseLinesWith'];
+
+ // convert any tabs to space
+ if(strpos($str, "\t") !== false) {
+ $str = str_replace("\t", " ", $str);
+ }
+
+ // convert CRs to LFs
+ if(strpos($str, "\r") !== false) {
+ $str = str_replace(array("\r\n", "\r"), "\n", $str);
+ }
+
+ // collapse whitespace that appears before or after newlines
+ while(strpos($str, " \n") !== false) $str = str_replace(" \n", "\n", $str);
+ while(strpos($str, "\n ") !== false) $str = str_replace("\n ", "\n", $str);
+
+ // convert redundant LFs to no more than double LFs
+ while(strpos($str, "\n\n\n") !== false) {
+ $str = str_replace("\n\n\n", "\n\n", $str);
+ }
+
+ // add character to indicate blocks, when asked for
+ if(!empty($options['endBlocksWith'])) {
+ $str = str_replace("\n\n", "$options[endBlocksWith]\n\n", $str);
+ }
+
+ // replace all types of newlines
+ $str = str_replace(array("\r\n", "\r", "\n\n", "\n"), $r, $str);
+
+ // while there are consecutives of our collapse string, reduce them to one
+ while(strpos($str, "$r$r") !== false) {
+ $str = str_replace("$r$r", $r, $str);
+ }
+
+ if($r !== $defaults['collapseLinesWith']) {
+ // replacement of whitespace with something other than another single whitespace
+ // so collapse consecutive spaces to one space, since this would not be already done
+ while(strpos($str, " ") !== false) {
+ $str = str_replace(" ", " ", $str);
+ }
+ // use space rather than replacement char when left side already ends with punctuation
+ foreach($this->getPunctuationChars() as $c) {
+ if(strpos($str, "$c$r")) $str = str_replace("$c$r", "$c ", $str);
+ }
+ }
+
+ return trim($str);
+ }
+
+ /**
+ * Truncate string to given maximum length without breaking words
+ *
+ * This method can truncate between words, sentences, punctuation or blocks (like paragraphs).
+ * See the `type` option for details on how it should truncate. By default it truncates between
+ * words. Description of types:
+ *
+ * - word: truncate to closest word.
+ * - punctuation: truncate to closest punctuation within sentence.
+ * - sentence: truncate to closest sentence.
+ * - block: truncate to closest block of text (like a paragraph or headline).
+ *
+ * Note that if your specified `type` is something other than “word”, and it cannot be matched
+ * within the maxLength, then it will attempt a different type. For instance, if you specify
+ * “sentence” as the type, and it cannot match a sentence, it will try to match to “punctuation”
+ * instead. If it cannot match that, then it will attempt “word”.
+ *
+ * HTML will be stripped from returned string. If you want to keep some tags use the `keepTags` or `keepFormatTags`
+ * options to specify what tags are allowed to remain. The `keepFormatTags` option that, when true, will make it
+ * retain all HTML inline text formatting tags.
+ *
+ * ~~~~~~~
+ * // Truncate string to closest word within 150 characters
+ * $s = $sanitizer->truncate($str, 150);
+ *
+ * // Truncate string to closest sentence within 300 characters
+ * $s = $sanitizer->truncate($str, 300, 'sentence');
+ *
+ * // Truncate with options
+ * $s = $sanitizer->truncate($str, [
+ * 'type' => 'punctuation',
+ * 'maxLength' => 300,
+ * 'visible' => true,
+ * 'more' => '…'
+ * ]);
+ * ~~~~~~~
+ *
+ * @param string $str String to truncate
+ * @param int|array $maxLength Maximum length of returned string, or specify $options array here.
+ * @param array|string $options Options array, or specify `type` option (string).
+ * - `type` (string): Preferred truncation type of word, punctuation, sentence, or block. (default='word')
+ * This is a “preferred type”, not an absolute one, because it will adjust to match what it can within your maxLength.
+ * - `maxLength` (int): Max characters for truncation, used only if $options array substituted for $maxLength argument.
+ * - `maximize` (bool): Include as much as possible within specified type and max-length? (default=true)
+ * If you specify false for the maximize option, it will truncate to first word, puncutation, sentence or block.
+ * - `visible` (bool): When true, invisible text (markup, entities, etc.) does not count towards string length. (default=false)
+ * - `trim` (string): Characters to trim from returned string. (default=',;/ ')
+ * - `noTrim` (string): Never trim these from end of returned string. (default=')]>}”»')
+ * - `more` (string): Append this to truncated strings that do not end with sentence punctuation. (default='…')
+ * - `keepTags` (array): HTML tags that should be kept in returned string. (default=[])
+ * - `keepFormatTags` (bool): Keep HTML text-formatting tags? Simpler alternative to keepTags option. (default=false)
+ * - `collapseLinesWith` (string): String to collapse lines with where the first is not punctuated. (default=' … ')
+ * - `convertEntities` (bool): Convert HTML entities to non-entity characters? (default=false)
+ * - `noEndSentence` (string): Strings that sentence may not end with, space-separated values (default='Mr. Mrs. …')
+ * @return string
+ *
+ */
+ function truncate($str, $maxLength, $options = array()) {
+
+ $defaults = array(
+ 'type' => 'word', // word, punctuation, sentence, or block
+ 'maximize' => true, // include as much as possible within the type and maxLength (false=include as little as possible)
+ 'visible' => false, // when true, invisible text (markup, entities, etc.) does not count towards string length. (default=false)
+ 'trim' => $this->_(',;/') . ' ', // Trim these characters from the end of the returned string
+ 'noTrim' => $this->_(')]>}”»'), // Never trim these characters from end of returned string
+ 'more' => '…', // Append to truncated strings that do not end with sentence punctuation
+ 'stripTags' => true, // strip HTML tags? (currently required, see keepTags to keep some)
+ 'keepTags' => array(), // if strip HTML tags is true, optional array of tag names you want to keep
+ 'keepFormatTags' => false, // alternative to keepTags: keep just inline text format tags like strong, em, etc.
+ 'collapseWhitespace' => true, // collapsed whitespace (currently required)
+ 'collapseLinesWith' => ' ' . $this->_('…') . ' ', // String placed between joined lines (like from paragraphs)
+ 'convertEntities' => false, // convert entity encoded characters to non-entity equivalents? (default=false)
+ 'noEndSentence' => $this->_('Mr. Mrs. Ms. Dr. Hon. PhD. i.e. e.g.'), // When in sentence type, words that do not end the sentence (space-separated)
+ );
+
+ if(!strlen($str)) return '';
+
+ if(is_string($options) && ctype_alpha($options)) {
+ $defaults['type'] = $options;
+ $options = array();
+ }
+
+ if(is_array($maxLength)) {
+ $options = $maxLength;
+ if(!isset($options['maxLength'])) $options['maxLength'] = 0;
+ $maxLength = $options['maxLength'];
+ } else if(is_string($maxLength) && ctype_alpha($maxLength)) {
+ $options['type'] = $maxLength;
+ $maxLength = isset($options['maxLength']) ? $options['maxLength'] : mb_strlen($str);
+ }
+
+ if(!$maxLength) $maxLength = 255;
+ $options = array_merge($defaults, $options);
+ $type = $options['type'];
+ $str = trim($str);
+ $blockEndChar = '¶';
+ $tests = array();
+ $punctuationChars = $this->getPunctuationChars();
+ $endSentenceChars = $this->getPunctuationChars(true);
+ $noEndSentenceWords = explode(' ', $options['noEndSentence']);
+
+ if($options['keepFormatTags']) {
+ $options['keepTags'] = array_merge($options['keepTags'], array(
+ 'abbr','acronym','b','big','cite','code','em','i','kbd', 'q','samp','small','span','strong','sub','sup','time','var',
+ ));
+ }
+
+ if($type === 'block') {
+ if(mb_strpos($str, $blockEndChar) !== false) $str = str_replace($blockEndChar, ' ', $str);
+ $options['endBlocksWith'] = $blockEndChar;
+ }
+
+ // collapse whitespace and strip tags
+ $str = $this->collapse($str, $options);
+
+ if(trim($options['collapseLinesWith']) && mb_strpos($str, $options['collapseLinesWith'])) {
+ // if lines are collapsed with something other than whitespace, avoid using that string
+ // when the line already ends with sentence punctuation
+ foreach($endSentenceChars as $c) {
+ $str = str_replace("$c$options[collapseLinesWith]", "$c ", $str);
+ }
+ }
+
+ // if anything above reduced the length of the string enough, return it now
+ if(mb_strlen($str) <= $maxLength) return $str;
+
+ // get string at maximum possible length
+ if($options['visible']) {
+ // adjust for only visible length
+ $_str = $str;
+ $str = mb_substr($str, 0, $maxLength);
+ $len = $this->getVisibleLength($str);
+ if($len < $maxLength) {
+ $maxLength += ($maxLength - $len);
+ $str = mb_substr($_str, 0, $maxLength);
+ }
+ unset($_str);
+ } else {
+ $str = mb_substr($str, 0, $maxLength);
+ }
+
+ // match to closest blocks, like paragraph(s)
+ if($type === 'block') {
+ $pos = $options['maximize'] ? mb_strrpos($str, $blockEndChar) : mb_strpos($str, $blockEndChar);
+ if($pos === false) {
+ $type = 'word';
+ } else {
+ $tests[] = $pos;
+ $options['trim'] .= $blockEndChar;
+ }
+ }
+
+ // find sentences closest to end
+ if($type === 'sentence') {
+ foreach($endSentenceChars as $find) {
+ $pos = $options['maximize'] ? mb_strrpos($str, "$find ") : mb_strpos($str, "$find ");
+ if($pos) $tests[] = $pos;
+ }
+ if(!count($tests)) $type = 'punctuation';
+ }
+
+ // find punctuation closes to end of string
+ if($type === 'punctuation') {
+ foreach($punctuationChars as $find) {
+ $pos = $options['maximize'] ? mb_strrpos($str, $find) : mb_strpos($str, $find);
+ if($pos) $tests[] = $pos;
+ }
+ if(!count($tests)) $type = 'word';
+ }
+
+ // find whitespace and last word closest to end of string
+ if($type === 'word' || !count($tests)) {
+ $pos = $options['maximize'] ? mb_strrpos($str, ' ') : mb_strpos($str, ' ');
+ if($pos) $tests[] = $pos;
+ }
+
+ // if we didn't find any place to truncate, just return exact truncated string
+ if(!count($tests)) {
+ return trim($str, $options['trim']) . $options['more'];
+ }
+
+ // we found somewhere to truncate, so truncate at the longest one possible
+ if($options['maximize']) {
+ sort($tests);
+ } else {
+ rsort($tests);
+ }
+
+ // process our tests
+ do {
+ $pos = array_pop($tests);
+ $result = trim(mb_substr($str, 0, $pos + 1));
+ $lastChar = mb_substr($result, -1);
+ $result = rtrim($result, $options['trim']);
+
+ if($type === 'sentence') {
+ $pos = strrpos($result, ' ');
+ if(!$pos) break;
+ // if sentence type, make sure it doesn't end with a disallowed word
+ $lastWord = mb_substr($result, $pos + 1);
+ while(!ctype_alnum(mb_substr($lastWord, 0, 1)) && strlen($lastWord)) {
+ $lastWord = mb_substr($lastWord, 1);
+ }
+ foreach($noEndSentenceWords as $word) {
+ if($word !== $lastWord) continue;
+ $tests[] = $pos;
+ $type = 'word';
+ $result = '';
+ break;
+ }
+ } else if($type === 'block') {
+ // good to go with result as is
+ } else {
+ if(in_array($lastChar, $endSentenceChars)) {
+ // great, end with sentence ending punctuation
+ } else if(in_array($lastChar, $punctuationChars)) {
+ $trims = ' ';
+ foreach($punctuationChars as $c) {
+ if(mb_strpos($options['noTrim'], $c) !== false) continue;
+ if(in_array($c, $endSentenceChars)) continue;
+ $trims .= $c;
+ }
+ $result = rtrim($result, $trims) . $options['more'];
+ } else {
+ $result .= $options['more'];
+ }
+ }
+
+ } while(!strlen($result) && count($tests));
+
+ // make sure we didn't break any HTML tags as a result of truncation
+ if(strlen($result) && count($options['keepTags']) && strpos($result, '<') !== false) {
+ $result = $this->fixUnclosedTags($result);
+ }
+
+ return $result;
+ }
+
+ /**
+ * Return visible length of string, which is length not counting markup or entities
+ *
+ * @param string $str
+ * @return int
+ *
+ */
+ public function getVisibleLength($str) {
+ if(strpos($str, '>')) {
+ $str = strip_tags($str);
+ }
+ if(strpos($str, '&') !== false && strpos($str, ';')) {
+ $str = html_entity_decode($str, ENT_QUOTES, 'UTF-8');
+ }
+ return mb_strlen($str);
+ }
+
+ /**
+ * Get array of punctuation characters
+ *
+ * @param bool $sentence Get only sentence-ending punctuation
+ * @return array|string
+ *
+ */
+ public function getPunctuationChars($sentence = false) {
+ if($sentence) {
+ $s = $this->_('. ? !'); // Sentence ending punctuation characters (must be space-separated)
+ } else {
+ $s = $this->_(', : . ? ! “ ” „ " – -- ( ) [ ] { } « »'); // All punctuation characters (must be space-separated)
+ }
+ return explode(' ', $s);
+ }
+
+}