mirror of
https://github.com/processwire/processwire.git
synced 2025-08-15 19:24:28 +02:00
Some minor updates to WireTextTools::truncate() method to improve sentence type matches
This commit is contained in:
@@ -393,7 +393,6 @@ class WireTextTools extends Wire {
|
|||||||
$tests = array();
|
$tests = array();
|
||||||
$punctuationChars = $this->getPunctuationChars();
|
$punctuationChars = $this->getPunctuationChars();
|
||||||
$endSentenceChars = $this->getPunctuationChars(true);
|
$endSentenceChars = $this->getPunctuationChars(true);
|
||||||
$noEndSentenceWords = explode(' ', $options['noEndSentence']);
|
|
||||||
|
|
||||||
if($options['keepFormatTags']) {
|
if($options['keepFormatTags']) {
|
||||||
$options['keepTags'] = array_merge($options['keepTags'], array(
|
$options['keepTags'] = array_merge($options['keepTags'], array(
|
||||||
@@ -439,7 +438,7 @@ class WireTextTools extends Wire {
|
|||||||
if($type === 'block') {
|
if($type === 'block') {
|
||||||
$pos = $options['maximize'] ? mb_strrpos($str, $blockEndChar) : mb_strpos($str, $blockEndChar);
|
$pos = $options['maximize'] ? mb_strrpos($str, $blockEndChar) : mb_strpos($str, $blockEndChar);
|
||||||
if($pos === false) {
|
if($pos === false) {
|
||||||
$type = 'word';
|
$type = 'sentence';
|
||||||
} else {
|
} else {
|
||||||
$tests[] = $pos;
|
$tests[] = $pos;
|
||||||
$options['trim'] .= $blockEndChar;
|
$options['trim'] .= $blockEndChar;
|
||||||
@@ -448,10 +447,7 @@ class WireTextTools extends Wire {
|
|||||||
|
|
||||||
// find sentences closest to end
|
// find sentences closest to end
|
||||||
if($type === 'sentence') {
|
if($type === 'sentence') {
|
||||||
foreach($endSentenceChars as $find) {
|
$this->truncateSentenceTests($str, $tests, $endSentenceChars, $options);
|
||||||
$pos = $options['maximize'] ? mb_strrpos($str, "$find ") : mb_strpos($str, "$find ");
|
|
||||||
if($pos) $tests[] = $pos;
|
|
||||||
}
|
|
||||||
if(!count($tests)) $type = 'punctuation';
|
if(!count($tests)) $type = 'punctuation';
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -489,26 +485,10 @@ class WireTextTools extends Wire {
|
|||||||
$lastChar = mb_substr($result, -1);
|
$lastChar = mb_substr($result, -1);
|
||||||
$result = rtrim($result, $options['trim']);
|
$result = rtrim($result, $options['trim']);
|
||||||
|
|
||||||
if($type === 'sentence') {
|
if($type === 'sentence' || $type === 'block') {
|
||||||
$pos = strrpos($result, ' ');
|
|
||||||
if(!$pos) break;
|
|
||||||
// if sentence type, make sure it doesn't end with a disallowed word
|
|
||||||
$lastWord = mb_substr($result, $pos + 1);
|
|
||||||
while(!ctype_alnum(mb_substr($lastWord, 0, 1)) && strlen($lastWord)) {
|
|
||||||
$lastWord = mb_substr($lastWord, 1);
|
|
||||||
}
|
|
||||||
foreach($noEndSentenceWords as $word) {
|
|
||||||
if($word !== $lastWord) continue;
|
|
||||||
$tests[] = $pos;
|
|
||||||
$type = 'word';
|
|
||||||
$result = '';
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
} else if($type === 'block') {
|
|
||||||
// good to go with result as is
|
// good to go with result as is
|
||||||
} else {
|
} else if(in_array($lastChar, $endSentenceChars)) {
|
||||||
if(in_array($lastChar, $endSentenceChars)) {
|
// good, end with sentence ending punctuation
|
||||||
// great, end with sentence ending punctuation
|
|
||||||
} else if(in_array($lastChar, $punctuationChars)) {
|
} else if(in_array($lastChar, $punctuationChars)) {
|
||||||
$trims = ' ';
|
$trims = ' ';
|
||||||
foreach($punctuationChars as $c) {
|
foreach($punctuationChars as $c) {
|
||||||
@@ -520,7 +500,6 @@ class WireTextTools extends Wire {
|
|||||||
} else {
|
} else {
|
||||||
$result .= $options['more'];
|
$result .= $options['more'];
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
} while(!strlen($result) && count($tests));
|
} while(!strlen($result) && count($tests));
|
||||||
|
|
||||||
@@ -532,6 +511,61 @@ class WireTextTools extends Wire {
|
|||||||
return $result;
|
return $result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper to truncate() method, generate tests/positions for where sentences end
|
||||||
|
*
|
||||||
|
* @param string $str
|
||||||
|
* @param array $tests Tests to append found positions to
|
||||||
|
* @param array $endSentenceChars
|
||||||
|
* @param array $options Options provided to truncate method
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
protected function truncateSentenceTests($str, array &$tests, array $endSentenceChars, array $options) {
|
||||||
|
|
||||||
|
$chars = $endSentenceChars;
|
||||||
|
$thisStr = $str;
|
||||||
|
$nextStr = '';
|
||||||
|
$nextOffset = 0;
|
||||||
|
$offset = 0; // offset used for maximize==false mode only
|
||||||
|
$n = 0;
|
||||||
|
|
||||||
|
// regex matches specified words, plus digits or single letters followed by period
|
||||||
|
$noEndRegex = '!\b(' . str_replace(' ', '|', preg_quote($options['noEndSentence'])) . '|\d+\.|\w\.)$!';
|
||||||
|
|
||||||
|
do {
|
||||||
|
|
||||||
|
if($nextStr) {
|
||||||
|
$offset = $nextOffset;
|
||||||
|
$thisStr = $nextStr;
|
||||||
|
$nextStr = '';
|
||||||
|
$chars = array('.');
|
||||||
|
}
|
||||||
|
|
||||||
|
foreach($chars as $find) {
|
||||||
|
|
||||||
|
$pos = $options['maximize'] ? mb_strrpos($thisStr, "$find ") : mb_strpos($thisStr, "$find ", $offset);
|
||||||
|
|
||||||
|
if(!$pos) continue;
|
||||||
|
|
||||||
|
if($find === '.') {
|
||||||
|
$testStr = mb_substr($thisStr, 0, $pos + 1);
|
||||||
|
if(preg_match($noEndRegex, $testStr, $matches)) {
|
||||||
|
// ends with a disallowed word, next time try to match with a shorter string
|
||||||
|
if($options['maximize']) {
|
||||||
|
$nextStr = mb_substr($testStr, 0, mb_strlen($testStr) - mb_strlen($matches[1]) - 1);
|
||||||
|
} else {
|
||||||
|
$nextOffset = mb_strlen($testStr);
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$tests[] = $pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
} while(strlen($nextStr) && ++$n < 3);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return visible length of string, which is length not counting markup or entities
|
* Return visible length of string, which is length not counting markup or entities
|
||||||
*
|
*
|
||||||
|
Reference in New Issue
Block a user