From 0c07a1c458ba0bec552132694a976e5db76b3a41 Mon Sep 17 00:00:00 2001 From: toyomoyo Date: Tue, 4 Dec 2007 05:10:12 +0000 Subject: [PATCH] MDL-12399, shorten_text() truncates all closing tags --- lib/moodlelib.php | 144 +++++++++++++++++++++++++++++++--------------- 1 file changed, 98 insertions(+), 46 deletions(-) diff --git a/lib/moodlelib.php b/lib/moodlelib.php index afad8738dd6..30be5066f8b 100644 --- a/lib/moodlelib.php +++ b/lib/moodlelib.php @@ -6345,62 +6345,114 @@ function random_string ($length=15) { /* * Given some text (which may contain HTML) and an ideal length, * this function truncates the text neatly on a word boundary if possible + * @param string $text - text to be shortened + * @param int $ideal - ideal string length + * @param boolean $exact if false, $text will not be cut mid-word + * @return string $truncate - shortened string */ -function shorten_text($text, $ideal=30) { - global $CFG; +function shorten_text($text, $ideal=30, $exact = false) { + global $CFG; + $ending = '...'; - $i = 0; - $tag = false; - $length = strlen($text); - $count = 0; - $stopzone = false; - $truncate = 0; + // if the plain text is shorter than the maximum length, return the whole text + if (strlen(preg_replace('/<.*?>/', '', $text)) <= $ideal) { + return $text; + } + + // splits all html-tags to scanable lines + preg_match_all('/(<.+?>)?([^<>]*)/s', $text, $lines, PREG_SET_ORDER); - if ($length <= $ideal) { - return $text; - } + $total_length = strlen($ending); + $open_tags = array(); + $truncate = ''; - for ($i=0; $i<$length; $i++) { - $char = $text[$i]; + foreach ($lines as $line_matchings) { + // if there is any html-tag in this line, handle it and add it (uncounted) to the output + if (!empty($line_matchings[1])) { + // if it's an "empty element" with or without xhtml-conform closing slash (f.e.
) + if (preg_match('/^<(\s*.+?\/\s*|\s*(img|br|input|hr|area|base|basefont|col|frame|isindex|link|meta|param)(\s.+?)?)>$/is', $line_matchings[1])) { + // do nothing + // if tag is a closing tag (f.e. ) + } else if (preg_match('/^<\s*\/([^\s]+?)\s*>$/s', $line_matchings[1], $tag_matchings)) { + // delete tag from $open_tags list + $pos = array_search($tag_matchings[1], array_reverse($open_tags, true)); // can have multiple exact same open tags, close the last one + if ($pos !== false) { + unset($open_tags[$pos]); + } + // if tag is an opening tag (f.e. ) + } else if (preg_match('/^<\s*([^\s>!]+).*?>$/s', $line_matchings[1], $tag_matchings)) { + // add tag to the beginning of $open_tags list + array_unshift($open_tags, strtolower($tag_matchings[1])); + } + // add html-tag to $truncate'd text + $truncate .= $line_matchings[1]; + } - switch ($char) { - case "<": - $tag = true; - break; - case ">": - $tag = false; - break; - default: - if (!$tag) { - if ($stopzone) { - if ($char == '.' or $char == ' ') { - $truncate = $i+1; - break 2; - } else if (ord($char) >= 0xE0) { // Chinese/Japanese/Korean text - $truncate = $i; // can be truncated at any UTF-8 - break 2; // character boundary. - } - } - $count++; - } - break; - } - if (!$stopzone) { - if ($count > $ideal) { - $stopzone = true; - } - } - } + // calculate the length of the plain text part of the line; handle entities as one character + $content_length = strlen(preg_replace('/&[0-9a-z]{2,8};|&#[0-9]{1,7};|&#x[0-9a-f]{1,6};/i', ' ', $line_matchings[2])); + if ($total_length+$content_length > $ideal) { + // the number of characters which are left + $left = $ideal - $total_length; + $entities_length = 0; + // search for html entities + if (preg_match_all('/&[0-9a-z]{2,8};|&#[0-9]{1,7};|&#x[0-9a-f]{1,6};/i', $line_matchings[2], $entities, PREG_OFFSET_CAPTURE)) { + // calculate the real length of all entities in the legal range + foreach ($entities[0] as $entity) { + if ($entity[1]+1-$entities_length <= $left) { + $left--; + $entities_length += strlen($entity[0]); + } else { + // no more characters left + break; + } + } + } + $truncate .= substr($line_matchings[2], 0, $left+$entities_length); + // maximum lenght is reached, so get off the loop + break; + } else { + $truncate .= $line_matchings[2]; + $total_length += $content_length; + } + + // if the maximum length is reached, get off the loop + if($total_length >= $ideal) { + break; + } + } - if (!$truncate) { - $truncate = $i; - } + // if the words shouldn't be cut in the middle... + if (!$exact) { + // ...search the last occurance of a space... + for ($k=strlen($truncate);$k>0;$k--) { + if (!empty($truncate[$k]) && ($char = $truncate[$k])) { + if ($char == '.' or $char == ' ') { + $breakpos = $k+1; + break; + } else if (ord($char) >= 0xE0) { // Chinese/Japanese/Korean text + $breakpos = $k; // can be truncated at any UTF-8 + break; // character boundary. + } + } + } + + if (isset($breakpos)) { + // ...and cut the text in this position + $truncate = substr($truncate, 0, $breakpos); + } + } - $ellipse = ($truncate < $length) ? '...' : ''; + // add the defined ending to the text + $truncate .= $ending; - return substr($text, 0, $truncate).$ellipse; + // close all unclosed html-tags + foreach ($open_tags as $tag) { + $truncate .= ''; + } + + return $truncate; }