mirror of
https://github.com/moodle/moodle.git
synced 2025-01-19 06:18:28 +01:00
MDL-12399, shorten_text() truncates all closing tags
This commit is contained in:
parent
97f0e78381
commit
0c07a1c458
@ -6345,62 +6345,114 @@ function random_string ($length=15) {
|
||||
/*
|
||||
* Given some text (which may contain HTML) and an ideal length,
|
||||
* this function truncates the text neatly on a word boundary if possible
|
||||
* @param string $text - text to be shortened
|
||||
* @param int $ideal - ideal string length
|
||||
* @param boolean $exact if false, $text will not be cut mid-word
|
||||
* @return string $truncate - shortened string
|
||||
*/
|
||||
function shorten_text($text, $ideal=30) {
|
||||
|
||||
global $CFG;
|
||||
function shorten_text($text, $ideal=30, $exact = false) {
|
||||
|
||||
global $CFG;
|
||||
$ending = '...';
|
||||
|
||||
$i = 0;
|
||||
$tag = false;
|
||||
$length = strlen($text);
|
||||
$count = 0;
|
||||
$stopzone = false;
|
||||
$truncate = 0;
|
||||
// if the plain text is shorter than the maximum length, return the whole text
|
||||
if (strlen(preg_replace('/<.*?>/', '', $text)) <= $ideal) {
|
||||
return $text;
|
||||
}
|
||||
|
||||
// splits all html-tags to scanable lines
|
||||
preg_match_all('/(<.+?>)?([^<>]*)/s', $text, $lines, PREG_SET_ORDER);
|
||||
|
||||
if ($length <= $ideal) {
|
||||
return $text;
|
||||
}
|
||||
$total_length = strlen($ending);
|
||||
$open_tags = array();
|
||||
$truncate = '';
|
||||
|
||||
for ($i=0; $i<$length; $i++) {
|
||||
$char = $text[$i];
|
||||
foreach ($lines as $line_matchings) {
|
||||
// if there is any html-tag in this line, handle it and add it (uncounted) to the output
|
||||
if (!empty($line_matchings[1])) {
|
||||
// if it's an "empty element" with or without xhtml-conform closing slash (f.e. <br/>)
|
||||
if (preg_match('/^<(\s*.+?\/\s*|\s*(img|br|input|hr|area|base|basefont|col|frame|isindex|link|meta|param)(\s.+?)?)>$/is', $line_matchings[1])) {
|
||||
// do nothing
|
||||
// if tag is a closing tag (f.e. </b>)
|
||||
} else if (preg_match('/^<\s*\/([^\s]+?)\s*>$/s', $line_matchings[1], $tag_matchings)) {
|
||||
// delete tag from $open_tags list
|
||||
$pos = array_search($tag_matchings[1], array_reverse($open_tags, true)); // can have multiple exact same open tags, close the last one
|
||||
if ($pos !== false) {
|
||||
unset($open_tags[$pos]);
|
||||
}
|
||||
// if tag is an opening tag (f.e. <b>)
|
||||
} else if (preg_match('/^<\s*([^\s>!]+).*?>$/s', $line_matchings[1], $tag_matchings)) {
|
||||
// add tag to the beginning of $open_tags list
|
||||
array_unshift($open_tags, strtolower($tag_matchings[1]));
|
||||
}
|
||||
// add html-tag to $truncate'd text
|
||||
$truncate .= $line_matchings[1];
|
||||
}
|
||||
|
||||
switch ($char) {
|
||||
case "<":
|
||||
$tag = true;
|
||||
break;
|
||||
case ">":
|
||||
$tag = false;
|
||||
break;
|
||||
default:
|
||||
if (!$tag) {
|
||||
if ($stopzone) {
|
||||
if ($char == '.' or $char == ' ') {
|
||||
$truncate = $i+1;
|
||||
break 2;
|
||||
} else if (ord($char) >= 0xE0) { // Chinese/Japanese/Korean text
|
||||
$truncate = $i; // can be truncated at any UTF-8
|
||||
break 2; // character boundary.
|
||||
}
|
||||
}
|
||||
$count++;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (!$stopzone) {
|
||||
if ($count > $ideal) {
|
||||
$stopzone = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
// calculate the length of the plain text part of the line; handle entities as one character
|
||||
$content_length = strlen(preg_replace('/&[0-9a-z]{2,8};|&#[0-9]{1,7};|&#x[0-9a-f]{1,6};/i', ' ', $line_matchings[2]));
|
||||
if ($total_length+$content_length > $ideal) {
|
||||
// the number of characters which are left
|
||||
$left = $ideal - $total_length;
|
||||
$entities_length = 0;
|
||||
// search for html entities
|
||||
if (preg_match_all('/&[0-9a-z]{2,8};|&#[0-9]{1,7};|&#x[0-9a-f]{1,6};/i', $line_matchings[2], $entities, PREG_OFFSET_CAPTURE)) {
|
||||
// calculate the real length of all entities in the legal range
|
||||
foreach ($entities[0] as $entity) {
|
||||
if ($entity[1]+1-$entities_length <= $left) {
|
||||
$left--;
|
||||
$entities_length += strlen($entity[0]);
|
||||
} else {
|
||||
// no more characters left
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
$truncate .= substr($line_matchings[2], 0, $left+$entities_length);
|
||||
// maximum lenght is reached, so get off the loop
|
||||
break;
|
||||
} else {
|
||||
$truncate .= $line_matchings[2];
|
||||
$total_length += $content_length;
|
||||
}
|
||||
|
||||
// if the maximum length is reached, get off the loop
|
||||
if($total_length >= $ideal) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!$truncate) {
|
||||
$truncate = $i;
|
||||
}
|
||||
// if the words shouldn't be cut in the middle...
|
||||
if (!$exact) {
|
||||
// ...search the last occurance of a space...
|
||||
for ($k=strlen($truncate);$k>0;$k--) {
|
||||
if (!empty($truncate[$k]) && ($char = $truncate[$k])) {
|
||||
if ($char == '.' or $char == ' ') {
|
||||
$breakpos = $k+1;
|
||||
break;
|
||||
} else if (ord($char) >= 0xE0) { // Chinese/Japanese/Korean text
|
||||
$breakpos = $k; // can be truncated at any UTF-8
|
||||
break; // character boundary.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (isset($breakpos)) {
|
||||
// ...and cut the text in this position
|
||||
$truncate = substr($truncate, 0, $breakpos);
|
||||
}
|
||||
}
|
||||
|
||||
$ellipse = ($truncate < $length) ? '...' : '';
|
||||
// add the defined ending to the text
|
||||
$truncate .= $ending;
|
||||
|
||||
return substr($text, 0, $truncate).$ellipse;
|
||||
// close all unclosed html-tags
|
||||
foreach ($open_tags as $tag) {
|
||||
$truncate .= '</' . $tag . '>';
|
||||
}
|
||||
|
||||
return $truncate;
|
||||
}
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user