1
0
mirror of https://github.com/processwire/processwire.git synced 2025-08-11 09:14:58 +02:00

Update WireTextTools with 2 new methods: getWordAlternates() and findReplaceEscapeChars()

This commit is contained in:
Ryan Cramer
2020-07-03 15:40:36 -04:00
parent d53e2ef323
commit 10541997d4
2 changed files with 205 additions and 2 deletions

View File

@@ -5,11 +5,13 @@
*
* #pw-summary Specific text and markup tools for ProcessWire $sanitizer and elsewhere.
*
* ProcessWire 3.x, Copyright 2018 by Ryan Cramer
* ProcessWire 3.x, Copyright 2020 by Ryan Cramer
* https://processwire.com
*
* @since 3.0.101
*
*
* @method array wordAlternates($word, array $options = array()) Protected method for hooking purposes only #pw-hooker #pw-internal
* @method string wordStem($word) Protected method for hooking purposes only #pw-hooker #pw-internal
*
*/
@@ -716,7 +718,125 @@ class WireTextTools extends Wire {
}
return explode(' ', $s);
}
/**
* Get alternate words for given word
*
* This method does not do anything unless an implementation is provided by a module (or something else)
* hooking the protected `WireTextTools::wordAlternates($word, $options)` method. Implementation should
* populate $event->return with any or all of the following (as available):
*
* - Word plural(s)
* - Word singular(s)
* - Word Lemmas
* - Word Synonyms
* - Anything else applicable to current $user->language
*
* See the protected WireTextTools::wordAlternates() method for hook instructions and an example.
*
* @param string $word
* @param array $options
* - `operator` (string): Operator being used, if applicable (default='')
* - `minLength` (int): Minimum word length to return in alternates (default=2)
* - `lowercase` (bool): Convert words to lowercase, if not already (default=false)
* @return array
* @since 3.0.162
* @see WireTextTools::getWordStem()
*
*/
public function getWordAlternates($word, array $options = array()) {
if(!$this->hasHook('wordAlternates()')) return array();
$defaults = array(
'operator' => '',
'minLength' => 2,
'lowercase' => false,
);
$options = array_merge($defaults, $options);
$word = $this->trim($word);
$words = array();
$wordLow = $this->strtolower($word);
if($options['lowercase']) $word = $wordLow;
if(empty($word)) return array();
$alternates = $this->wordAlternates($word, $options);
if(!count($alternates)) return array();
// if original word appears in return value, remove it
$key = array_search($word, $alternates);
if($key !== false) unset($alternates[$key]);
// populate $words, removing any invalid or duplicate values
foreach($alternates as $w) {
if(!is_string($w)) continue;
$w = $this->trim($w);
$wLow = $this->strtolower($w);
if($wLow === $wordLow) continue; // dup of original word
if($options['lowercase']) $w = $wLow; // use lowercase
if($this->strlen($w) < $options['minLength']) continue; // too short
if(isset($words[$wLow])) continue; // already have it
$words[$wLow] = $w;
}
return array_values($words);
}
/**
* Hookable method to return alternate words for given word
*
* This hookable method is separate from the public getWordAlternates() method so that
* we can provide predictable and already-populated $options to whatever is hooking this, as
* as provide some additional QA with the return value from modules/hooks.
*
* It is fine if the return value contains duplicates, the original word, or too-short words,
* as the calling getWordAlternates() takes care of those before returning words to user.
* Basically, hooks can ignore the `$options` argument, unless they need to know the `operator`,
* which may or may not be provided by the caller.
*
* In hook implementation, avoid deleting whats already present in $event->return just in
* case multiple hooks are adding words.
*
* ~~~~~
* // Contrived example of how to implement
* $wire->addHookAfter('WireTextTools::wordAlternates', function(HookEvent $event) {
* $word = $event->arguments(0); // string: word requested alternates for
* $words = $event->return; // array: existing return value
*
* $cats = [ 'cat', 'cats', 'kitty', 'feline', 'felines' ];
* $dogs = [ 'dog', 'dogs', 'doggy', 'canine', 'canines' ];
*
* if(in_array($word, $cats)) {
* $words = array_merge($words, $cats);
* } else if(in_array($word, $dogs)) {
* $words = array_merge($words, $dogs);
* }
*
* $event->return = $words;
* });
*
* // Test it out
* $words = $sanitizer->getTextTools()->getWordAlternates('cat');
* echo implode(', ', $words); // outputs: cats, kitty, kitten, feline, felines
* ~~~~~
*
* #pw-hooker
*
* @param string $word
* @param array $options
* - `operator` (string): Operator being used, if applicable (default='')
* @return array
* @since 3.0.162
*
*/
protected function ___wordAlternates($word, array $options) {
if($word && $options) {} // ignore
$alternates = array();
return $alternates;
}
/**
* Find and return all {placeholder} tags found in given string
*
@@ -981,6 +1101,89 @@ class WireTextTools extends Wire {
return $out;
}
/**
* Find escaped characters in $str, replace them with a placeholder, and return the placeholders
*
* Usage
* ~~~~~
* // 1. Escape certain chars in a string that you want to survive some processing:
* $str = 'Hello \*world\* foo \"bar\" baz';
*
* // 2. Use this method to find escape chars and replace them temporarily:
* $a = $sanitizer->getTextTools()->findReplaceEscapeChars($str, [ '*', '"' ]);
*
* // 3. Process string with anything that you want NOT to see chars that were escaped:
* $str = some_function_that_processes_the_string($str);
*
* // 4. Do this to restore the escaped chars (restored without backslashes by default):
* $str = str_replace(array_keys($a), array_values($a), $str);
* ~~~~~
*
* @param string &$str String to find escape chars in, it will be modified directly (passed by reference)
* @param array $escapeChars Array of chars you want to escape i.e. [ '*', '[', ']', '(', ')', '`', '_', '\\', '"' ]
* @param array $options Options to modify behavior:
* - `escapePrefix` (string): Character used to escape another character (default is backslash).
* - `restoreEscape` (bool): Should returned array also include the escape prefix, so escapes are restored? (default=false)
* - `gluePrefix` (string): Prefix for placeholders we substitute for escaped characters (default='{ESC')
* - `glueSuffix` (string): Suffix for placeholders we substitute for escaped characters (default='}')
* - `unescapeUnknown` (bool): If we come across escaped char not in your $escapeChars list, unescape it? (default=false)
* - `removeUnknown` (bool): If we come across escaped char not in your $escapeChars list, remove the escape and char? (default=false)
* @return array Returns assoc array where keys are placeholders substituted in $str and values are escaped characters.
* @since 3.0.162
*
*/
public function findReplaceEscapeChars(&$str, array $escapeChars, array $options = array()) {
$defaults = array(
'escapePrefix' => '\\',
'restoreEscape' => false, // when restoring, also restore escape prefix?
'gluePrefix' => '{ESC',
'glueSuffix' => '}',
'unescapeUnknown' => false,
'removeUnknown' => false,
);
$options = array_merge($defaults, $options);
$escapePrefix = $options['escapePrefix'];
if(strpos($str, $escapePrefix) === false) return array();
$escapes = array();
$glueSuffix = $options['glueSuffix'];
$parts = explode($escapePrefix, $str);
$n = 0;
do {
$gluePrefix = $options['gluePrefix'] . $n;
} while($this->strpos($str, $gluePrefix) !== false && ++$n);
$str = array_shift($parts);
foreach($parts as $key => $part) {
$len = $this->strlen($part);
$char = $len > 0 ? $this->substr($part, 0, 1) : ''; // char being escaped
$part = $len > 1 ? $this->substr($part, 1) : ''; // everything after it
$charKey = array_search($char, $escapeChars); // find placeholder (glue)
if($charKey !== false) {
// replace escaped char with placeholder ($glue)
$glue = $gluePrefix . $charKey . $glueSuffix;
$escapes[$glue] = $options['keepEscapePrefix'] ? $escapePrefix . $char : $char;
$str .= $glue . $part;
} else if($options['unescapeUnknown']) {
// unescape unknown escape char
$str .= $char . $part;
} else if($options['removeUnknown']) {
// remove unknown escape char
$str .= $part;
} else {
// some other backslash thats allowed, restore back as it was
$str .= $escapePrefix . $char . $part;
}
}
return $escapes;
}
/***********************************************************************************************************
* MULTIBYTE PHP STRING FUNCTIONS THAT FALLBACK WHEN MBSTRING NOT AVAILABLE