From f287d04d3ead3c72b9de8badb11be68856b2f6ba Mon Sep 17 00:00:00 2001 From: Steve Clay Date: Sun, 12 Feb 2012 21:07:36 -0500 Subject: [PATCH] Moved ill-advised Java class ports elsewhere --- min/lib/MrClay/Matcher.php | 594 ------------------------- min/lib/MrClay/Pattern.php | 27 -- min_unit_tests/test_MrClay_Matcher.php | 55 --- 3 files changed, 676 deletions(-) delete mode 100644 min/lib/MrClay/Matcher.php delete mode 100644 min/lib/MrClay/Pattern.php delete mode 100644 min_unit_tests/test_MrClay_Matcher.php diff --git a/min/lib/MrClay/Matcher.php b/min/lib/MrClay/Matcher.php deleted file mode 100644 index ba9a1be..0000000 --- a/min/lib/MrClay/Matcher.php +++ /dev/null @@ -1,594 +0,0 @@ - - */ -class MrClay_Matcher { - - /** - * @var MrClay_Pattern The Pattern object that created this Matcher. - */ - protected $parentPattern; - - /** - * @var array The storage used by groups. They may contain invalid values if a group was skipped during the matching. - */ - protected $groups = array(); - - /** - * @var int The beginning of the range within the sequence that is to be matched. Anchors will match at these - * "hard" boundaries. Changing the region changes these values. - */ - protected $from; - - /** - * @var int The end of the range within the sequence that is to be matched. Anchors will match at these - * "hard" boundaries. Changing the region changes these values. - */ - protected $to; - - /** - * @var string The original string being matched. - */ - protected $text; - - /** - * mode used for matching all the input. - */ - const ENDANCHOR = 1; - - /** - * mode used when a match does not have to consume all of the input. - */ - const NOANCHOR = 0; - - /** - * @var int Matcher state used by the last node. NOANCHOR is used when a match does not have to consume - * all of the input. ENDANCHOR is the mode used for matching all the input. - */ - protected $acceptMode = self::NOANCHOR; - - /** - * @var int Beginning of the range of string that last matched the pattern. If the last match failed then $first - * is -1; - */ - protected $first = -1; - - /** - * @var int $last initially holds 0 then it holds the index of the end of the last match (which is where the next - * search starts). - */ - protected $last = 0; - - /** - * @var int The end index of what matched in the last match operation. - */ - protected $oldLast = -1; - - /** - * @var int The index of the last position appended in a substitution. - */ - protected $lastAppendPosition = 0; - - /** - * @var bool Boolean indicating whether or not more input could change the results of the last match. - * - * If hitEnd is true, and a match was found, then more input might cause a different match to be found. - * If hitEnd is true and a match was not found, then more input could cause a match to be found. - * If hitEnd is false and a match was found, then more input will not change the match. - * If hitEnd is false and a match was not found, then more input will not cause a match to be found. - */ - protected $hitEnd; - - /** - * @var bool Boolean indicating whether or not more input could change a positive match into a negative one. - * - * If requireEnd is true, and a match was found, then more input could cause the match to be lost. - * If requireEnd is false and a match was found, then more input might change the match but the match won't be lost. - * If a match was not found, then requireEnd has no meaning. - */ - protected $requireEnd; - - /** - * @var bool If transparentBounds is true then the boundaries of this matcher's region are transparent to - * lookahead, lookbehind, and boundary matching constructs that try to see beyond them. - */ - protected $transparentBounds = false; - - /** - * @var bool If anchoringBounds is true then the boundaries of this matcher's region match anchors such as ^ and $. - */ - protected $anchoringBounds = true; - - /** - * @param MrClay_Pattern $parent - * @param string $text - */ - public function __construct(MrClay_Pattern $parent, $text) - { - $this->parentPattern = $parent; - $this->text = $text; - - // Put fields into initial states - $this->reset(); - } - - /** - * Returns the pattern that is interpreted by this matcher. - * @return MrClay_Pattern - */ - public function pattern() - { - return $this->parentPattern; - } - - /** - * Returns the match state of this matcher - * @return MrClay_MatchResult - */ - public function toMatchResult() - { - // @todo - } - - /** - * Changes the Pattern that this Matcher uses to find matches with. - * @param MrClay_Pattern $newPattern - * @return MrClay_Matcher - */ - public function usePattern(MrClay_Pattern $newPattern) - { - $this->parentPattern = $newPattern; - $this->groups = array(); - return $this; - } - - /** - * Attempts to match the entire region against the pattern. - * @return bool - */ - public function matches() - { - return $this->match($this->from, self::ENDANCHOR); - } - - /** - * Attempts to find the next subsequence of the input sequence that matches the pattern. If $start is specified, - * this resets this matcher and then attempts to find the next subsequence of the input sequence that matches the - * pattern, starting at the specified index. - * @param int $start - * @return bool - */ - public function find($start = null) - { - if ($start !== null) { - $this->reset(); - return $this->search($start); - } - - $nextSearchIndex = $this->last; - if ($nextSearchIndex == $this->first) - $nextSearchIndex++; - - // If next search starts before region, start it at region - if ($nextSearchIndex < $this->from) - $nextSearchIndex = $this->from; - - // If next search starts beyond region then it fails - if ($nextSearchIndex > $this->to) { - $this->groups = array(); - return false; - } - return $this->search($nextSearchIndex); - } - - /** - * Resets this matcher with a new input sequence. - * @param $input - * @return MrClay_Matcher - */ - public function reset($input = null) - { - if ($input !== null) { - $this->text = $input; - } - $this->first = -1; - $this->last = 0; - $this->oldLast = -1; - $this->groups = array(); - $this->lastAppendPosition = 0; - $this->from = 0; - $this->to = strlen($this->text); - return $this; - } - - /** - * Returns the start index of the previous match. If $group is given, this returns the start index of the - * subsequence captured by the given group during the previous match operation. - * @param int $group - * @return int - */ - public function start($group = null) - { - if ($group !== null) { - return isset($this->groups[$group * 2]) ? $this->groups[$group * 2] : -1; - } - return $this->first; - } - - /** - * Returns the offset after the last character matched. If $group is given, this returns the offset after the last - * character of the subsequence captured by the given group during the previous match operation - * @param int $group - * @return int - */ - public function end($group = null) - { - if ($group !== null) { - return isset($this->groups[$group * 2 + 1]) ? $this->groups[$group * 2 + 1] : -1; - } - return $this->last; - } - - /** - * Returns the input subsequence matched by the previous match. If $group is given, this returns the input - * subsequence captured by the given group during the previous match operation. - * @param int $group - * @return string - */ - public function group($group = null) - { - if ($group !== null) { - return substr( - $this->text, - $this->groups[$group * 2], - ($this->groups[$group * 2 + 1] - $this->groups[$group * 2])); - } - return $this->group(0); - } - - /** - * Returns the number of capturing groups in this matcher's pattern. - * @return int - */ - public function groupCount() - { - return $this->parentPattern->capturingGroupCount - 1; - } - - /** - * Attempts to match the input sequence, starting at the beginning of the region, against the pattern. - * @return bool - */ - public function lookingAt() - { - return $this->match($this->from, self::NOANCHOR); - } - - /** - * Returns a literal replacement String for the specified String. - * @param string $s - * @return string - */ - public function quoteReplacement($s) - { - return preg_quote($s, '/'); - } - - /** - * Implements a non-terminal append-and-replace step. - * @param string $sb - * @param string $replacement - * @return MrClay_Matcher - */ - public function appendReplacement(&$sb, $replacement) - { - // If no match, return error - if ($this->first < 0) - throw new RuntimeException("No match available"); - - // Process substitution string to replace group references with groups - $cursor = 0; - $result = ''; - - while ($cursor < strlen($replacement)) { - $nextChar = $replacement[$cursor]; - if ($nextChar == '\\') { - $cursor++; - $nextChar = $replacement[$cursor]; - $result .= $nextChar; - $cursor++; - } else if ($nextChar == '$') { - // Skip past $ - $cursor++; - // The first number is always a group - $refNum = (int) $replacement[$cursor] - '0'; - if (($refNum < 0) || ($refNum > 9)) - throw new InvalidArgumentException( - "Illegal group reference"); - $cursor++; - - // Capture the largest legal group string - $done = false; - while (!$done) { - if ($cursor >= strlen($replacement)) { - break; - } - $nextDigit = $replacement[$cursor] - '0'; - if (($nextDigit < 0) || ($nextDigit > 9)) { // not a number - break; - } - $newRefNum = ($refNum * 10) + $nextDigit; - if ($this->groupCount() < $newRefNum) { - $done = true; - } else { - $refNum = $newRefNum; - $cursor++; - } - } - // Append group - if ($this->start($refNum) != -1 && $this->end($refNum) != -1) - $result .= substr( - $this->text, - $this->start($refNum), - $this->end($refNum) - $this->start($refNum) - ); - } else { - $result .= $nextChar; - $cursor++; - } - } - // Append the intervening text - $sb .= substr($this->text, $this->lastAppendPosition, $this->first - $this->lastAppendPosition); - // Append the match substitution - $sb .= $result; - - $this->lastAppendPosition = $this->last; - return $this; - } - - /** - * Implements a terminal append-and-replace step. - * @param string $sb - * @return string - */ - public function appendTail(&$sb) - { - $sb .= substr($this->text, $this->lastAppendPosition); - return $sb; - } - - /** - * Replaces every subsequence of the input sequence that matches the pattern with the given replacement string. - * @param string $replacement - * @return string - */ - public function replaceAll($replacement) - { - $this->reset(); - $result = $this->find(); - if ($result) { - $sb = ''; - do { - $this->appendReplacement($sb, $replacement); - $result = $this->find(); - } while ($result); - $this->appendTail($sb); - return $sb; - } - return $this->text; - } - - /** - * Replaces the first subsequence of the input sequence that matches the pattern with the given replacement string. - * @param string $replacement - * @return string - */ - public function replaceFirst($replacement) - { - $this->reset(); - if (!$this->find()) - return $this->text; - $sb = ''; - $this->appendReplacement($sb, $replacement); - $this->appendTail($sb); - return $sb; - } - - /** - * Sets the limits of this matcher's region. - * @param int $start - * @param int $end - * @return MrClay_Matcher - */ - public function region($start, $end) - { - $this->reset(); - $this->from = $start; - $this->to = $end; - return $this; - } - - /** - * Reports the start index of this matcher's region. - * @return int - */ - public function regionStart() - { - return $this->from; - } - - /** - * Reports the end index (exclusive) of this matcher's region. - * @return int - */ - public function regionEnd() - { - return $this->to; - } - - /** - * Queries the transparency of region bounds for this matcher. - * @return bool - */ - public function hasTransparentBounds() - { - return $this->transparentBounds; - } - - /** - * Sets the transparency of region bounds for this matcher. - * @param bool $b - * @return MrClay_Matcher - */ - public function useTransparentBounds($b) - { - $this->transparentBounds = $b; - return $this; - } - - /** - * Queries the anchoring of region bounds for this matcher. - * @return bool - */ - public function hasAnchoringBounds() - { - return $this->anchoringBounds; - } - - /** - * Sets the anchoring of region bounds for this matcher. - * @param bool $b - * @return MrClay_Matcher - */ - public function useAnchoringBounds($b) - { - $this->anchoringBounds = $b; - return $this; - } - - /** - * Returns true if the end of input was hit by the search engine in the last match operation performed by this - * matcher. - * @return bool - */ - public function hitEnd() - { - return $this->hitEnd; - } - - /** - * Returns true if more input could change a positive match into a negative one. - * @return bool - */ - public function requireEnd() - { - return $this->requireEnd; - } - - /** - * Initiates a search to find a Pattern within the given bounds. The groups are filled with - * default values and the match of the root of the state machine is called. The state machine - * will hold the state of the match as it proceeds in this matcher. - * - * Matcher.from is not set here, because it is the "hard" boundary of the start of the search - * which anchors will set to. The from param is the "soft" boundary of the start of the search, - * meaning that the regex tries to match at that index but ^ won't match there. Subsequent - * calls to the search methods start at a new "soft" boundary which is the end of the previous match. - * - * @param int $from - * @return bool - */ - protected function search($from) - { - $this->hitEnd = false; - $this->requireEnd = false; - $from = $from < 0 ? 0 : $from; - $this->first = $from; - $this->oldLast = $this->oldLast < 0 ? $from : $this->oldLast; - $this->groups = array(); - $this->acceptMode = self::NOANCHOR; - - if ($this->anchoringBounds) { - // ^ and $ should anchor the beg/end of the string given - $result = preg_match($this->parentPattern->pattern, substr($this->text, $from), $m, PREG_OFFSET_CAPTURE); - if ($result) { - // adjust the offsets to account for sending a substring in - foreach ($m as $k => $match) { - $m[$k][1] = $match[1] + $from; - } - } - } else { - $result = preg_match($this->parentPattern->pattern, $this->text, $m, PREG_OFFSET_CAPTURE, $from); - } - if ($result) { - $this->first = $m[0][1]; - $this->last = $m[0][1] + strlen($m[0][0]); - foreach ($m as $match) { - $this->groups[] = $match[1]; - $this->groups[] = $match[1] + strlen($match[0]); - } - } else { - $this->first = -1; - } - $this->oldLast = $this->last; - return (bool) $result; - } - - /** - * Initiates a search for an anchored match to a Pattern within the given - * bounds. The groups are filled with default values and the match of the - * root of the state machine is called. The state machine will hold the - * state of the match as it proceeds in this matcher. - * - * @param int $from - * @param int $anchor - * @return bool - */ - protected function match($from, $anchor) { - $this->hitEnd = false; - $this->requireEnd = false; - $from = $from < 0 ? 0 : $from; - $this->first = $from; - $this->oldLast = $this->oldLast < 0 ? $from : $this->oldLast; - $this->groups = array(); - $this->acceptMode = $anchor; - - if ($this->anchoringBounds) { - // ^ and $ should anchor the beg/end of the string given - $result = preg_match($this->parentPattern->pattern, substr($this->text, $from), $m, PREG_OFFSET_CAPTURE); - if ($result) { - // adjust the offsets to account for sending a substring in - foreach ($m as $k => $match) { - $m[$k][1] = $match[1] + $from; - } - } - } else { - $result = preg_match($this->parentPattern->pattern, $this->text, $m, PREG_OFFSET_CAPTURE, $from); - } - // require entire match in ENDANCHOR mode - if ($result && ($this->acceptMode === self::ENDANCHOR) && ($m[0][0] !== substr($this->text, $from))) { - $result = false; - } - if ($result) { - $this->first = $m[0][1]; - $this->last = $m[0][1] + strlen($m[0][0]); - foreach ($m as $match) { - $this->groups[] = $match[1]; - $this->groups[] = $match[1] + strlen($match[0]); - } - } else { - $this->first = -1; - } - $this->oldLast = $this->last; - return (bool) $result; - } -} diff --git a/min/lib/MrClay/Pattern.php b/min/lib/MrClay/Pattern.php deleted file mode 100644 index b88fc40..0000000 --- a/min/lib/MrClay/Pattern.php +++ /dev/null @@ -1,27 +0,0 @@ - - */ -class MrClay_Pattern { - public $pattern = ''; - public $capturingGroupCount = 0; - - public function __construct($pattern, $numGroups) - { - $this->pattern = $pattern; - $this->capturingGroupCount = $numGroups; - } - - public function matcher($input) - { - return new MrClay_Matcher($this, $input); - } -} diff --git a/min_unit_tests/test_MrClay_Matcher.php b/min_unit_tests/test_MrClay_Matcher.php deleted file mode 100644 index a067f1f..0000000 --- a/min_unit_tests/test_MrClay_Matcher.php +++ /dev/null @@ -1,55 +0,0 @@ - '/cat/', - 'numGroups' => 0, - 'input' => 'one cat two cats in the yard', - 'replacement' => 'dog', - ), - array( - 'pattern' => '/([0-9a-f])\1([0-9a-f])\2([0-9a-f])\3/', - 'numGroups' => 3, - 'input' => 'Hello World! aa66ff fefe 44ee6677 gg', - 'replacement' => '######', - ), - array( - 'pattern' => '/^([0-9a-f])\1([0-9a-f])\2([0-9a-f])\3/', - 'numGroups' => 3, - 'input' => 'Hello World! aa66ff fefe 44ee6677 gg', - 'replacement' => '######', - ), - array( - 'pattern' => '/([0-9a-f])\1([0-9a-f])\2([0-9a-f])\3/', - 'numGroups' => 3, - 'input' => 'Hello World! aa66ff fefe 44ee6677 gg', - 'replacement' => '$1$1$2$2$3$3', - ), - ); - foreach ($tests as $test) { - $pattern = new MrClay_Pattern($test['pattern'], $test['numGroups']); - $matcher = $pattern->matcher($test['input']); - var_export($matcher->matches()); echo "\n"; - while ($matcher->find()) { - var_export($matcher->group()); - } - $matcher->reset(); echo "\n"; - $sb = ''; - while ($matcher->find()) { - $matcher->appendReplacement($sb, $test['replacement']); - } - $matcher->appendTail($sb); - var_export($sb); - echo "\n\n"; - } -} - -test_MrClay_Matcher();