mirror of
https://github.com/processwire/processwire.git
synced 2025-08-11 09:14:58 +02:00
Update WireTextTools with 2 new methods: getWordAlternates() and findReplaceEscapeChars()
This commit is contained in:
@@ -5,11 +5,13 @@
|
||||
*
|
||||
* #pw-summary Specific text and markup tools for ProcessWire $sanitizer and elsewhere.
|
||||
*
|
||||
* ProcessWire 3.x, Copyright 2018 by Ryan Cramer
|
||||
* ProcessWire 3.x, Copyright 2020 by Ryan Cramer
|
||||
* https://processwire.com
|
||||
*
|
||||
* @since 3.0.101
|
||||
*
|
||||
*
|
||||
* @method array wordAlternates($word, array $options = array()) Protected method for hooking purposes only #pw-hooker #pw-internal
|
||||
* @method string wordStem($word) Protected method for hooking purposes only #pw-hooker #pw-internal
|
||||
*
|
||||
*/
|
||||
|
||||
@@ -716,7 +718,125 @@ class WireTextTools extends Wire {
|
||||
}
|
||||
return explode(' ', $s);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get alternate words for given word
|
||||
*
|
||||
* This method does not do anything unless an implementation is provided by a module (or something else)
|
||||
* hooking the protected `WireTextTools::wordAlternates($word, $options)` method. Implementation should
|
||||
* populate $event->return with any or all of the following (as available):
|
||||
*
|
||||
* - Word plural(s)
|
||||
* - Word singular(s)
|
||||
* - Word Lemmas
|
||||
* - Word Synonyms
|
||||
* - Anything else applicable to current $user->language
|
||||
*
|
||||
* See the protected WireTextTools::wordAlternates() method for hook instructions and an example.
|
||||
*
|
||||
* @param string $word
|
||||
* @param array $options
|
||||
* - `operator` (string): Operator being used, if applicable (default='')
|
||||
* - `minLength` (int): Minimum word length to return in alternates (default=2)
|
||||
* - `lowercase` (bool): Convert words to lowercase, if not already (default=false)
|
||||
* @return array
|
||||
* @since 3.0.162
|
||||
* @see WireTextTools::getWordStem()
|
||||
*
|
||||
*/
|
||||
public function getWordAlternates($word, array $options = array()) {
|
||||
|
||||
if(!$this->hasHook('wordAlternates()')) return array();
|
||||
|
||||
$defaults = array(
|
||||
'operator' => '',
|
||||
'minLength' => 2,
|
||||
'lowercase' => false,
|
||||
);
|
||||
|
||||
$options = array_merge($defaults, $options);
|
||||
$word = $this->trim($word);
|
||||
$words = array();
|
||||
$wordLow = $this->strtolower($word);
|
||||
|
||||
if($options['lowercase']) $word = $wordLow;
|
||||
if(empty($word)) return array();
|
||||
|
||||
$alternates = $this->wordAlternates($word, $options);
|
||||
if(!count($alternates)) return array();
|
||||
|
||||
// if original word appears in return value, remove it
|
||||
$key = array_search($word, $alternates);
|
||||
if($key !== false) unset($alternates[$key]);
|
||||
|
||||
// populate $words, removing any invalid or duplicate values
|
||||
foreach($alternates as $w) {
|
||||
if(!is_string($w)) continue;
|
||||
$w = $this->trim($w);
|
||||
$wLow = $this->strtolower($w);
|
||||
if($wLow === $wordLow) continue; // dup of original word
|
||||
if($options['lowercase']) $w = $wLow; // use lowercase
|
||||
if($this->strlen($w) < $options['minLength']) continue; // too short
|
||||
if(isset($words[$wLow])) continue; // already have it
|
||||
$words[$wLow] = $w;
|
||||
}
|
||||
|
||||
return array_values($words);
|
||||
}
|
||||
|
||||
/**
|
||||
* Hookable method to return alternate words for given word
|
||||
*
|
||||
* This hookable method is separate from the public getWordAlternates() method so that
|
||||
* we can provide predictable and already-populated $options to whatever is hooking this, as
|
||||
* as provide some additional QA with the return value from modules/hooks.
|
||||
*
|
||||
* It is fine if the return value contains duplicates, the original word, or too-short words,
|
||||
* as the calling getWordAlternates() takes care of those before returning words to user.
|
||||
* Basically, hooks can ignore the `$options` argument, unless they need to know the `operator`,
|
||||
* which may or may not be provided by the caller.
|
||||
*
|
||||
* In hook implementation, avoid deleting what’s already present in $event->return just in
|
||||
* case multiple hooks are adding words.
|
||||
*
|
||||
* ~~~~~
|
||||
* // Contrived example of how to implement
|
||||
* $wire->addHookAfter('WireTextTools::wordAlternates', function(HookEvent $event) {
|
||||
* $word = $event->arguments(0); // string: word requested alternates for
|
||||
* $words = $event->return; // array: existing return value
|
||||
*
|
||||
* $cats = [ 'cat', 'cats', 'kitty', 'feline', 'felines' ];
|
||||
* $dogs = [ 'dog', 'dogs', 'doggy', 'canine', 'canines' ];
|
||||
*
|
||||
* if(in_array($word, $cats)) {
|
||||
* $words = array_merge($words, $cats);
|
||||
* } else if(in_array($word, $dogs)) {
|
||||
* $words = array_merge($words, $dogs);
|
||||
* }
|
||||
*
|
||||
* $event->return = $words;
|
||||
* });
|
||||
*
|
||||
* // Test it out
|
||||
* $words = $sanitizer->getTextTools()->getWordAlternates('cat');
|
||||
* echo implode(', ', $words); // outputs: cats, kitty, kitten, feline, felines
|
||||
* ~~~~~
|
||||
*
|
||||
* #pw-hooker
|
||||
*
|
||||
* @param string $word
|
||||
* @param array $options
|
||||
* - `operator` (string): Operator being used, if applicable (default='')
|
||||
* @return array
|
||||
* @since 3.0.162
|
||||
*
|
||||
*/
|
||||
protected function ___wordAlternates($word, array $options) {
|
||||
if($word && $options) {} // ignore
|
||||
$alternates = array();
|
||||
return $alternates;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find and return all {placeholder} tags found in given string
|
||||
*
|
||||
@@ -981,6 +1101,89 @@ class WireTextTools extends Wire {
|
||||
|
||||
return $out;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find escaped characters in $str, replace them with a placeholder, and return the placeholders
|
||||
*
|
||||
* Usage
|
||||
* ~~~~~
|
||||
* // 1. Escape certain chars in a string that you want to survive some processing:
|
||||
* $str = 'Hello \*world\* foo \"bar\" baz';
|
||||
*
|
||||
* // 2. Use this method to find escape chars and replace them temporarily:
|
||||
* $a = $sanitizer->getTextTools()->findReplaceEscapeChars($str, [ '*', '"' ]);
|
||||
*
|
||||
* // 3. Process string with anything that you want NOT to see chars that were escaped:
|
||||
* $str = some_function_that_processes_the_string($str);
|
||||
*
|
||||
* // 4. Do this to restore the escaped chars (restored without backslashes by default):
|
||||
* $str = str_replace(array_keys($a), array_values($a), $str);
|
||||
* ~~~~~
|
||||
*
|
||||
* @param string &$str String to find escape chars in, it will be modified directly (passed by reference)
|
||||
* @param array $escapeChars Array of chars you want to escape i.e. [ '*', '[', ']', '(', ')', '`', '_', '\\', '"' ]
|
||||
* @param array $options Options to modify behavior:
|
||||
* - `escapePrefix` (string): Character used to escape another character (default is backslash).
|
||||
* - `restoreEscape` (bool): Should returned array also include the escape prefix, so escapes are restored? (default=false)
|
||||
* - `gluePrefix` (string): Prefix for placeholders we substitute for escaped characters (default='{ESC')
|
||||
* - `glueSuffix` (string): Suffix for placeholders we substitute for escaped characters (default='}')
|
||||
* - `unescapeUnknown` (bool): If we come across escaped char not in your $escapeChars list, unescape it? (default=false)
|
||||
* - `removeUnknown` (bool): If we come across escaped char not in your $escapeChars list, remove the escape and char? (default=false)
|
||||
* @return array Returns assoc array where keys are placeholders substituted in $str and values are escaped characters.
|
||||
* @since 3.0.162
|
||||
*
|
||||
*/
|
||||
public function findReplaceEscapeChars(&$str, array $escapeChars, array $options = array()) {
|
||||
|
||||
$defaults = array(
|
||||
'escapePrefix' => '\\',
|
||||
'restoreEscape' => false, // when restoring, also restore escape prefix?
|
||||
'gluePrefix' => '{ESC',
|
||||
'glueSuffix' => '}',
|
||||
'unescapeUnknown' => false,
|
||||
'removeUnknown' => false,
|
||||
);
|
||||
|
||||
$options = array_merge($defaults, $options);
|
||||
$escapePrefix = $options['escapePrefix'];
|
||||
if(strpos($str, $escapePrefix) === false) return array();
|
||||
$escapes = array();
|
||||
$glueSuffix = $options['glueSuffix'];
|
||||
$parts = explode($escapePrefix, $str);
|
||||
$n = 0;
|
||||
|
||||
do {
|
||||
$gluePrefix = $options['gluePrefix'] . $n;
|
||||
} while($this->strpos($str, $gluePrefix) !== false && ++$n);
|
||||
|
||||
$str = array_shift($parts);
|
||||
|
||||
foreach($parts as $key => $part) {
|
||||
|
||||
$len = $this->strlen($part);
|
||||
$char = $len > 0 ? $this->substr($part, 0, 1) : ''; // char being escaped
|
||||
$part = $len > 1 ? $this->substr($part, 1) : ''; // everything after it
|
||||
$charKey = array_search($char, $escapeChars); // find placeholder (glue)
|
||||
|
||||
if($charKey !== false) {
|
||||
// replace escaped char with placeholder ($glue)
|
||||
$glue = $gluePrefix . $charKey . $glueSuffix;
|
||||
$escapes[$glue] = $options['keepEscapePrefix'] ? $escapePrefix . $char : $char;
|
||||
$str .= $glue . $part;
|
||||
} else if($options['unescapeUnknown']) {
|
||||
// unescape unknown escape char
|
||||
$str .= $char . $part;
|
||||
} else if($options['removeUnknown']) {
|
||||
// remove unknown escape char
|
||||
$str .= $part;
|
||||
} else {
|
||||
// some other backslash that’s allowed, restore back as it was
|
||||
$str .= $escapePrefix . $char . $part;
|
||||
}
|
||||
}
|
||||
|
||||
return $escapes;
|
||||
}
|
||||
|
||||
/***********************************************************************************************************
|
||||
* MULTIBYTE PHP STRING FUNCTIONS THAT FALLBACK WHEN MBSTRING NOT AVAILABLE
|
||||
|
Reference in New Issue
Block a user