'', 'stack' => array(), 'valuestack' => array(), 'lv' => 0, 'isf' => 0, 'isf_reason' => '', 'value' => null, 'method' => false, 'params' => array(), 'pt' => array(), 'rt' => '', ); /** * @var array[] */ protected $xmlrpc_valid_parents = array( 'VALUE' => array('MEMBER', 'DATA', 'PARAM', 'FAULT'), 'BOOLEAN' => array('VALUE'), 'I4' => array('VALUE'), 'I8' => array('VALUE'), 'EX:I8' => array('VALUE'), 'INT' => array('VALUE'), 'STRING' => array('VALUE'), 'DOUBLE' => array('VALUE'), 'DATETIME.ISO8601' => array('VALUE'), 'BASE64' => array('VALUE'), 'MEMBER' => array('STRUCT'), 'NAME' => array('MEMBER'), 'DATA' => array('ARRAY'), 'ARRAY' => array('VALUE'), 'STRUCT' => array('VALUE'), 'PARAM' => array('PARAMS'), 'METHODNAME' => array('METHODCALL'), 'PARAMS' => array('METHODCALL', 'METHODRESPONSE'), 'FAULT' => array('METHODRESPONSE'), 'NIL' => array('VALUE'), // only used when extension activated 'EX:NIL' => array('VALUE'), // only used when extension activated ); /** @var array $parsing_options */ protected $parsing_options = array(); /** @var int $accept self::ACCEPT_REQUEST | self::ACCEPT_RESPONSE by default */ //protected $accept = 3; /** @var int $maxChunkLength 4 MB by default. Any value below 10MB should be good */ protected $maxChunkLength = 4194304; /** @var array * Used keys: accept, target_charset, methodname_callback, plus the ones set here. * We initialize it partially to help keep BC with subclasses which might have reimplemented `parse()` but not * the element handler methods */ protected $current_parsing_options = array( 'xmlrpc_null_extension' => false, 'xmlrpc_return_datetimes' => false, 'xmlrpc_reject_invalid_values' => false ); /** * @param array $options integer keys: options passed to the inner xml parser * string keys: * - target_charset (string) * - methodname_callback (callable) * - xmlrpc_null_extension (bool) * - xmlrpc_return_datetimes (bool) * - xmlrpc_reject_invalid_values (bool) */ public function __construct(array $options = array()) { $this->parsing_options = $options; } /** * Parses an xml-rpc xml string. Results of the parsing are found in $this->['_xh']. * Logs to the error log any issues which do not cause the parsing to fail. * * @param string $data * @param string $returnType self::RETURN_XMLRPCVALS, self::RETURN_PHP, self::RETURN_EPIVALS * @param int $accept a bit-combination of self::ACCEPT_REQUEST, self::ACCEPT_RESPONSE, self::ACCEPT_VALUE * @param array $options integer-key options are passed to the xml parser, string-key options are used independently. * These options are added to options received in the constructor. * Note that if options xmlrpc_null_extension, xmlrpc_return_datetimes and xmlrpc_reject_invalid_values * are not set, the default settings from PhpXmlRpc\PhpXmlRpc are used * @return array see the definition of $this->_xh for the meaning of the results * @throws \Exception this can happen if a callback function is set and it does throw (i.e. we do not catch exceptions) * * @todo refactor? we could 1. return the parsed data structure, and 2. move $returnType and $accept into options * @todo feature-creep make it possible to pass in options overriding usage of PhpXmlRpc::$xmlrpc_XXX_format, so * that parsing will be completely independent of global state. Note that it might incur a small perf hit... */ public function parse($data, $returnType = self::RETURN_XMLRPCVALS, $accept = 3, $options = array()) { $this->_xh = array( 'ac' => '', 'stack' => array(), 'valuestack' => array(), 'lv' => 0, 'isf' => 0, 'isf_reason' => '', 'value' => null, 'method' => false, // so we can check later if we got a methodname or not 'params' => array(), 'pt' => array(), 'rt' => '', ); $len = strlen($data); // we test for empty documents here to save on resource allocation and simplify the chunked-parsing loop below if ($len == 0) { $this->_xh['isf'] = 3; $this->_xh['isf_reason'] = 'XML error 5: empty document'; return $this->_xh; } $this->current_parsing_options = array('accept' => $accept); $mergedOptions = $this->parsing_options; foreach ($options as $key => $val) { $mergedOptions[$key] = $val; } foreach ($mergedOptions as $key => $val) { // q: can php be built without ctype? should we use a regexp? if (is_string($key) && !ctype_digit($key)) { /// @todo on invalid options, throw/error-out instead of logging an error message? switch($key) { case 'target_charset': if (function_exists('mb_convert_encoding')) { $this->current_parsing_options['target_charset'] = $val; } else { $this->getLogger()->error('XML-RPC: ' . __METHOD__ . ": 'target_charset' option is unsupported without mbstring"); } break; case 'methodname_callback': if (is_callable($val)) { $this->current_parsing_options['methodname_callback'] = $val; } else { $this->getLogger()->error('XML-RPC: ' . __METHOD__ . ": Callback passed as 'methodname_callback' is not callable"); } break; case 'xmlrpc_null_extension': case 'xmlrpc_return_datetimes': case 'xmlrpc_reject_invalid_values': $this->current_parsing_options[$key] = $val; break; default: $this->getLogger()->error('XML-RPC: ' . __METHOD__ . ": unsupported option: $key"); } unset($mergedOptions[$key]); } } if (!isset($this->current_parsing_options['xmlrpc_null_extension'])) { $this->current_parsing_options['xmlrpc_null_extension'] = PhpXmlRpc::$xmlrpc_null_extension; } if (!isset($this->current_parsing_options['xmlrpc_return_datetimes'])) { $this->current_parsing_options['xmlrpc_return_datetimes'] = PhpXmlRpc::$xmlrpc_return_datetimes; } if (!isset($this->current_parsing_options['xmlrpc_reject_invalid_values'])) { $this->current_parsing_options['xmlrpc_reject_invalid_values'] = PhpXmlRpc::$xmlrpc_reject_invalid_values; } // NB: we use '' instead of null to force charset detection from the xml declaration $parser = xml_parser_create(''); foreach ($mergedOptions as $key => $val) { xml_parser_set_option($parser, $key, $val); } // always set this, in case someone tries to disable it via options... xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 1); xml_set_object($parser, $this); switch ($returnType) { case self::RETURN_PHP: xml_set_element_handler($parser, 'xmlrpc_se', 'xmlrpc_ee_fast'); break; case self::RETURN_EPIVALS: xml_set_element_handler($parser, 'xmlrpc_se', 'xmlrpc_ee_epi'); break; /// @todo log an error / throw / error-out on unsupported return type case XMLParser::RETURN_XMLRPCVALS: default: xml_set_element_handler($parser, 'xmlrpc_se', 'xmlrpc_ee'); } xml_set_character_data_handler($parser, 'xmlrpc_cd'); xml_set_default_handler($parser, 'xmlrpc_dh'); try { // @see ticket #70 - we have to parse big xml docs in chunks to avoid errors for ($offset = 0; $offset < $len; $offset += $this->maxChunkLength) { $chunk = substr($data, $offset, $this->maxChunkLength); // error handling: xml not well formed if (!xml_parse($parser, $chunk, $offset + $this->maxChunkLength >= $len)) { $errCode = xml_get_error_code($parser); $errStr = sprintf('XML error %s: %s at line %d, column %d', $errCode, xml_error_string($errCode), xml_get_current_line_number($parser), xml_get_current_column_number($parser)); $this->_xh['isf'] = 3; $this->_xh['isf_reason'] = $errStr; } // no need to parse further if we already have a fatal error if ($this->_xh['isf'] >= 2) { break; } } /// @todo bump minimum php version to 5.5 and use a finally clause instead of doing cleanup 3 times } catch (\Exception $e) { xml_parser_free($parser); $this->current_parsing_options = array(); /// @todo should we set $this->_xh['isf'] and $this->_xh['isf_reason'] ? throw $e; } catch (\Error $e) { xml_parser_free($parser); $this->current_parsing_options = array(); //$this->accept = $prevAccept; /// @todo should we set $this->_xh['isf'] and $this->_xh['isf_reason'] ? throw $e; } xml_parser_free($parser); $this->current_parsing_options = array(); return $this->_xh; } /** * xml parser handler function for opening element tags. * @internal * * @param resource $parser * @param string $name * @param $attrs * @param bool $acceptSingleVals DEPRECATED use the $accept parameter instead * @return void * * @todo optimization creep: throw when setting $this->_xh['isf'] > 1, to completely avoid further xml parsing * and remove the checking for $this->_xh['isf'] >= 2 everywhere */ public function xmlrpc_se($parser, $name, $attrs, $acceptSingleVals = false) { // if invalid xml-rpc already detected, skip all processing if ($this->_xh['isf'] >= 2) { return; } // check for correct element nesting if (count($this->_xh['stack']) == 0) { // top level element can only be of 2 types /// @todo optimization creep: save this check into a bool variable, instead of using count() every time: /// there is only a single top level element in xml anyway // BC if ($acceptSingleVals === false) { $accept = $this->current_parsing_options['accept']; } else { $this->logDeprecation('Using argument $acceptSingleVals for method ' . __METHOD__ . ' is deprecated'); $accept = self::ACCEPT_REQUEST | self::ACCEPT_RESPONSE | self::ACCEPT_VALUE; } if (($name == 'METHODCALL' && ($accept & self::ACCEPT_REQUEST)) || ($name == 'METHODRESPONSE' && ($accept & self::ACCEPT_RESPONSE)) || ($name == 'VALUE' && ($accept & self::ACCEPT_VALUE)) || ($name == 'FAULT' && ($accept & self::ACCEPT_FAULT))) { $this->_xh['rt'] = strtolower($name); } else { $this->_xh['isf'] = 2; $this->_xh['isf_reason'] = 'missing top level xmlrpc element. Found: ' . $name; return; } } else { // not top level element: see if parent is OK $parent = end($this->_xh['stack']); if (!array_key_exists($name, $this->xmlrpc_valid_parents) || !in_array($parent, $this->xmlrpc_valid_parents[$name])) { $this->_xh['isf'] = 2; $this->_xh['isf_reason'] = "xmlrpc element $name cannot be child of $parent"; return; } } switch ($name) { // optimize for speed switch cases: most common cases first case 'VALUE': /// @todo we could check for 2 VALUE elements inside a MEMBER or PARAM element $this->_xh['vt'] = 'value'; // indicator: no value found yet $this->_xh['ac'] = ''; $this->_xh['lv'] = 1; $this->_xh['php_class'] = null; break; case 'I8': case 'EX:I8': if (PHP_INT_SIZE === 4) { // INVALID ELEMENT: RAISE ISF so that it is later recognized!!! $this->_xh['isf'] = 2; $this->_xh['isf_reason'] = "Received i8 element but php is compiled in 32 bit mode"; return; } // fall through voluntarily case 'I4': case 'INT': case 'STRING': case 'BOOLEAN': case 'DOUBLE': case 'DATETIME.ISO8601': case 'BASE64': if ($this->_xh['vt'] != 'value') { // two data elements inside a value: an error occurred! $this->_xh['isf'] = 2; $this->_xh['isf_reason'] = "$name element following a {$this->_xh['vt']} element inside a single value"; return; } $this->_xh['ac'] = ''; // reset the accumulator break; case 'STRUCT': case 'ARRAY': if ($this->_xh['vt'] != 'value') { // two data elements inside a value: an error occurred! $this->_xh['isf'] = 2; $this->_xh['isf_reason'] = "$name element following a {$this->_xh['vt']} element inside a single value"; return; } // create an empty array to hold child values, and push it onto appropriate stack $curVal = array( 'values' => array(), 'type' => $name, ); // check for out-of-band information to rebuild php objs and, in case it is found, save it if (@isset($attrs['PHP_CLASS'])) { $curVal['php_class'] = $attrs['PHP_CLASS']; } $this->_xh['valuestack'][] = $curVal; $this->_xh['vt'] = 'data'; // be prepared for a data element next break; case 'DATA': if ($this->_xh['vt'] != 'data') { // two data elements inside a value: an error occurred! $this->_xh['isf'] = 2; $this->_xh['isf_reason'] = "found two data elements inside an array element"; return; } case 'METHODCALL': case 'METHODRESPONSE': case 'PARAMS': // valid elements that add little to processing break; case 'METHODNAME': case 'NAME': /// @todo we could check for 2 NAME elements inside a MEMBER element $this->_xh['ac'] = ''; break; case 'FAULT': $this->_xh['isf'] = 1; break; case 'MEMBER': // set member name to null, in case we do not find in the xml later on $this->_xh['valuestack'][count($this->_xh['valuestack']) - 1]['name'] = null; //$this->_xh['ac']=''; // Drop trough intentionally case 'PARAM': // clear value type, so we can check later if no value has been passed for this param/member $this->_xh['vt'] = null; break; case 'NIL': case 'EX:NIL': if ($this->current_parsing_options['xmlrpc_null_extension']) { if ($this->_xh['vt'] != 'value') { // two data elements inside a value: an error occurred! $this->_xh['isf'] = 2; $this->_xh['isf_reason'] = "$name element following a {$this->_xh['vt']} element inside a single value"; return; } // reset the accumulator - q: is this necessary at all here? we don't use it on _ee anyway for NILs $this->_xh['ac'] = ''; } else { $this->_xh['isf'] = 2; $this->_xh['isf_reason'] = 'Invalid NIL value received. Support for NIL can be enabled via \\PhpXmlRpc\\PhpXmlRpc::$xmlrpc_null_extension'; return; } break; default: // INVALID ELEMENT: RAISE ISF so that it is later recognized /// @todo feature creep = allow a callback instead $this->_xh['isf'] = 2; $this->_xh['isf_reason'] = "found not-xmlrpc xml element $name"; return; } // Save current element name to stack, to validate nesting $this->_xh['stack'][] = $name; /// @todo optimization creep: move this inside the big switch() above if ($name != 'VALUE') { $this->_xh['lv'] = 0; } } /** * xml parser handler function for close element tags. * @internal * * @param resource $parser * @param string $name * @param int $rebuildXmlrpcvals >1 for rebuilding xmlrpcvals, 0 for rebuilding php values, -1 for xmlrpc-extension compatibility * @return void * @throws \Exception this can happen if a callback function is set and it does throw (i.e. we do not catch exceptions) * * @todo optimization creep: throw when setting $this->_xh['isf'] > 1, to completely avoid further xml parsing * and remove the checking for $this->_xh['isf'] >= 2 everywhere */ public function xmlrpc_ee($parser, $name, $rebuildXmlrpcvals = 1) { if ($this->_xh['isf'] >= 2) { return; } // push this element name from stack // NB: if XML validates, correct opening/closing is guaranteed and we do not have to check for $name == $currElem. // we also checked for proper nesting at start of elements... $currElem = array_pop($this->_xh['stack']); switch ($name) { case 'VALUE': // If no scalar was inside , it was a string value if ($this->_xh['vt'] == 'value') { $this->_xh['value'] = $this->_xh['ac']; $this->_xh['vt'] = Value::$xmlrpcString; } // in case there is charset conversion required, do it here, to catch both cases of string values if (isset($this->current_parsing_options['target_charset']) && $this->_xh['vt'] === Value::$xmlrpcString) { $this->_xh['value'] = mb_convert_encoding($this->_xh['value'], $this->current_parsing_options['target_charset'], 'UTF-8'); } if ($rebuildXmlrpcvals > 0) { // build the xml-rpc val out of the data received, and substitute it $temp = new Value($this->_xh['value'], $this->_xh['vt']); // in case we got info about underlying php class, save it in the object we're rebuilding if (isset($this->_xh['php_class'])) { $temp->_php_class = $this->_xh['php_class']; } $this->_xh['value'] = $temp; } elseif ($rebuildXmlrpcvals < 0) { if ($this->_xh['vt'] == Value::$xmlrpcDateTime) { $this->_xh['value'] = (object)array( 'xmlrpc_type' => 'datetime', 'scalar' => $this->_xh['value'], 'timestamp' => \PhpXmlRpc\Helper\Date::iso8601Decode($this->_xh['value']) ); } elseif ($this->_xh['vt'] == Value::$xmlrpcBase64) { $this->_xh['value'] = (object)array( 'xmlrpc_type' => 'base64', 'scalar' => $this->_xh['value'] ); } } else { /// @todo this should handle php-serialized objects, since std deserializing is done /// by php_xmlrpc_decode, which we will not be calling... //if (isset($this->_xh['php_class'])) { //} } // check if we are inside an array or struct: // if value just built is inside an array, let's move it into array on the stack $vscount = count($this->_xh['valuestack']); if ($vscount && $this->_xh['valuestack'][$vscount - 1]['type'] == 'ARRAY') { $this->_xh['valuestack'][$vscount - 1]['values'][] = $this->_xh['value']; } break; case 'STRING': $this->_xh['vt'] = Value::$xmlrpcString; $this->_xh['lv'] = 3; // indicate we've found a value $this->_xh['value'] = $this->_xh['ac']; break; case 'BOOLEAN': $this->_xh['vt'] = Value::$xmlrpcBoolean; $this->_xh['lv'] = 3; // indicate we've found a value // We translate boolean 1 or 0 into PHP constants true or false. Strings 'true' and 'false' are accepted, // even though the spec never mentions them (see e.g. Blogger api docs) // NB: this simple checks helps a lot sanitizing input, i.e. no security problems around here // Note the non-strict type check: it will allow ' 1 ' /// @todo feature-creep: use a flexible regexp, the same as we do with int, double and datetime. /// Note that using a regexp would also make this test less sensitive to phpunit shenanigans, and /// to changes in the way php compares strings (since 8.0, leading and trailing newlines are /// accepted when deciding if a string numeric...) if ($this->_xh['ac'] == '1' || strcasecmp($this->_xh['ac'], 'true') === 0) { $this->_xh['value'] = true; } else { // log if receiving something strange, even though we set the value to false anyway /// @todo to be consistent with the other types, we should return a value outside the good-value domain, e.g. NULL if ($this->_xh['ac'] != '0' && strcasecmp($this->_xh['ac'], 'false') !== 0) { if (!$this->handleParsingError('invalid data received in BOOLEAN value: ' . $this->truncateValueForLog($this->_xh['ac']), __METHOD__)) { return; } } $this->_xh['value'] = false; } break; case 'EX:I8': $name = 'i8'; // fall through voluntarily case 'I4': case 'I8': case 'INT': // NB: we build the Value object with the original xml element name found, except for ex:i8. The // `Value::scalarTyp()` function will do some normalization of the data $this->_xh['vt'] = strtolower($name); $this->_xh['lv'] = 3; // indicate we've found a value if (!preg_match(PhpXmlRpc::$xmlrpc_int_format, $this->_xh['ac'])) { if (!$this->handleParsingError('non numeric data received in INT value: ' . $this->truncateValueForLog($this->_xh['ac']), __METHOD__)) { return; } /// @todo: find a better way of reporting an error value than this! Use NaN? $this->_xh['value'] = 'ERROR_NON_NUMERIC_FOUND'; } else { // it's ok, add it on $this->_xh['value'] = (int)$this->_xh['ac']; } break; case 'DOUBLE': $this->_xh['vt'] = Value::$xmlrpcDouble; $this->_xh['lv'] = 3; // indicate we've found a value if (!preg_match(PhpXmlRpc::$xmlrpc_double_format, $this->_xh['ac'])) { if (!$this->handleParsingError('non numeric data received in DOUBLE value: ' . $this->truncateValueForLog($this->_xh['ac']), __METHOD__)) { return; } $this->_xh['value'] = 'ERROR_NON_NUMERIC_FOUND'; } else { // it's ok, add it on $this->_xh['value'] = (double)$this->_xh['ac']; } break; case 'DATETIME.ISO8601': $this->_xh['vt'] = Value::$xmlrpcDateTime; $this->_xh['lv'] = 3; // indicate we've found a value if (!preg_match(PhpXmlRpc::$xmlrpc_datetime_format, $this->_xh['ac'])) { if (!$this->handleParsingError('invalid data received in DATETIME value: ' . $this->truncateValueForLog($this->_xh['ac']), __METHOD__)) { return; } } if ($this->current_parsing_options['xmlrpc_return_datetimes']) { try { $this->_xh['value'] = new \DateTime($this->_xh['ac']); // the default regex used to validate the date string a few lines above should make this case impossible, // but one never knows... } catch(\Exception $e) { // what to do? We can not guarantee that a valid date can be created. We return null... if (!$this->handleParsingError('invalid data received in DATETIME value. Error ' . $e->getMessage(), __METHOD__)) { return; } } } else { $this->_xh['value'] = $this->_xh['ac']; } break; case 'BASE64': $this->_xh['vt'] = Value::$xmlrpcBase64; $this->_xh['lv'] = 3; // indicate we've found a value if ($this->current_parsing_options['xmlrpc_reject_invalid_values']) { $v = base64_decode($this->_xh['ac'], true); if ($v === false) { $this->_xh['isf'] = 2; $this->_xh['isf_reason'] = 'Invalid data received in BASE64 value: '. $this->truncateValueForLog($this->_xh['ac']); return; } } else { $v = base64_decode($this->_xh['ac']); if ($v === '' && $this->_xh['ac'] !== '') { // only the empty string should decode to the empty string $this->getLogger()->error('XML-RPC: ' . __METHOD__ . ': invalid data received in BASE64 value: ' . $this->truncateValueForLog($this->_xh['ac'])); } } $this->_xh['value'] = $v; break; case 'NAME': $this->_xh['valuestack'][count($this->_xh['valuestack']) - 1]['name'] = $this->_xh['ac']; break; case 'MEMBER': // add to array in the stack the last element built, unless no VALUE or no NAME were found if ($this->_xh['vt']) { $vscount = count($this->_xh['valuestack']); if ($this->_xh['valuestack'][$vscount - 1]['name'] === null) { if (!$this->handleParsingError('missing NAME inside STRUCT in received xml', __METHOD__)) { return; } $this->_xh['valuestack'][$vscount - 1]['name'] = ''; } $this->_xh['valuestack'][$vscount - 1]['values'][$this->_xh['valuestack'][$vscount - 1]['name']] = $this->_xh['value']; } else { if (!$this->handleParsingError('missing VALUE inside STRUCT in received xml', __METHOD__)) { return; } } break; case 'DATA': $this->_xh['vt'] = null; // reset this to check for 2 data elements in a row - even if they're empty break; case 'STRUCT': case 'ARRAY': // fetch out of stack array of values, and promote it to current value $currVal = array_pop($this->_xh['valuestack']); $this->_xh['value'] = $currVal['values']; $this->_xh['vt'] = strtolower($name); if (isset($currVal['php_class'])) { $this->_xh['php_class'] = $currVal['php_class']; } break; case 'PARAM': // add to array of params the current value, unless no VALUE was found /// @todo should we also check if there were two VALUE inside the PARAM? if ($this->_xh['vt']) { $this->_xh['params'][] = $this->_xh['value']; $this->_xh['pt'][] = $this->_xh['vt']; } else { if (!$this->handleParsingError('missing VALUE inside PARAM in received xml', __METHOD__)) { return; } } break; case 'METHODNAME': if (!preg_match(PhpXmlRpc::$xmlrpc_methodname_format, $this->_xh['ac'])) { if (!$this->handleParsingError('invalid data received in METHODNAME: '. $this->truncateValueForLog($this->_xh['ac']), __METHOD__)) { return; } } $methodName = trim($this->_xh['ac']); $this->_xh['method'] = $methodName; // we allow the callback to f.e. give us back a mangled method name by manipulating $this if (isset($this->current_parsing_options['methodname_callback'])) { call_user_func($this->current_parsing_options['methodname_callback'], $methodName, $this, $parser); } break; case 'NIL': case 'EX:NIL': // NB: if NIL support is not enabled, parsing stops at element start. So this If is redundant //if ($this->current_parsing_options['xmlrpc_null_extension']) { $this->_xh['vt'] = 'null'; $this->_xh['value'] = null; $this->_xh['lv'] = 3; //} break; /// @todo add extra checking: /// - METHODRESPONSE should contain either a PARAMS with a single PARAM, or a FAULT /// - FAULT should contain a single struct with the 2 expected members (check their name and type) /// - METHODCALL should contain a methodname case 'PARAMS': case 'FAULT': case 'METHODCALL': case 'METHODRESPONSE': break; default: // End of INVALID ELEMENT // Should we add an assert here for unreachable code? When an invalid element is found in xmlrpc_se, // $this->_xh['isf'] is set to 2... break; } } /** * Used in decoding xml-rpc requests/responses without rebuilding xml-rpc Values. * @internal * * @param resource $parser * @param string $name * @return void */ public function xmlrpc_ee_fast($parser, $name) { $this->xmlrpc_ee($parser, $name, 0); } /** * Used in decoding xml-rpc requests/responses while building xmlrpc-extension Values (plain php for all but base64 and datetime). * @internal * * @param resource $parser * @param string $name * @return void */ public function xmlrpc_ee_epi($parser, $name) { $this->xmlrpc_ee($parser, $name, -1); } /** * xml parser handler function for character data. * @internal * * @param resource $parser * @param string $data * @return void */ public function xmlrpc_cd($parser, $data) { // skip processing if xml fault already detected if ($this->_xh['isf'] >= 2) { return; } // "lookforvalue == 3" means that we've found an entire value and should discard any further character data if ($this->_xh['lv'] != 3) { $this->_xh['ac'] .= $data; } } /** * xml parser handler function for 'other stuff', i.e. not char data or element start/end tag. * In fact, it only gets called on unknown entities... * @internal * * @param $parser * @param string data * @return void */ public function xmlrpc_dh($parser, $data) { // skip processing if xml fault already detected if ($this->_xh['isf'] >= 2) { return; } if (substr($data, 0, 1) == '&' && substr($data, -1, 1) == ';') { $this->_xh['ac'] .= $data; } } /** * xml charset encoding guessing helper function. * Tries to determine the charset encoding of an XML chunk received over HTTP. * NB: according to the spec (RFC 3023), if text/xml content-type is received over HTTP without a content-type, * we SHOULD assume it is strictly US-ASCII. But we try to be more tolerant of non-conforming (legacy?) clients/servers, * which will be most probably using UTF-8 anyway... * In order of importance checks: * 1. http headers * 2. BOM * 3. XML declaration * 4. guesses using mb_detect_encoding() * * @param string $httpHeader the http Content-type header * @param string $xmlChunk xml content buffer * @param string $encodingPrefs comma separated list of character encodings to be used as default (when mb extension is enabled). * This can also be set globally using PhpXmlRpc::$xmlrpc_detectencodings * @return string the encoding determined. Null if it can't be determined and mbstring is enabled, * PhpXmlRpc::$xmlrpc_defencoding if it can't be determined and mbstring is not enabled * * @todo explore usage of mb_http_input(): does it detect http headers + post data? if so, use it instead of hand-detection!!! * @todo feature-creep make it possible to pass in options overriding usage of PhpXmlRpc static variables, to make * the method independent of global state */ public static function guessEncoding($httpHeader = '', $xmlChunk = '', $encodingPrefs = null) { // discussion: see http://www.yale.edu/pclt/encoding/ // 1 - test if encoding is specified in HTTP HEADERS // Details: // LWS: (\13\10)?( |\t)+ // token: (any char but excluded stuff)+ // quoted string: " (any char but double quotes and control chars)* " // header: Content-type = ...; charset=value(; ...)* // where value is of type token, no LWS allowed between 'charset' and value // Note: we do not check for invalid chars in VALUE: // this had better be done using pure ereg as below // Note 2: we might be removing whitespace/tabs that ought to be left in if // the received charset is a quoted string. But nobody uses such charset names... /// @todo this test will pass if ANY header has charset specification, not only Content-Type. Fix it? $matches = array(); if (preg_match('/;\s*charset\s*=([^;]+)/i', $httpHeader, $matches)) { return strtoupper(trim($matches[1], " \t\"")); } // 2 - scan the first bytes of the data for a UTF-16 (or other) BOM pattern // (source: http://www.w3.org/TR/2000/REC-xml-20001006) // NOTE: actually, according to the spec, even if we find the BOM and determine // an encoding, we should check if there is an encoding specified // in the xml declaration, and verify if they match. /// @todo implement check as described above? /// @todo implement check for first bytes of string even without a BOM? (It sure looks harder than for cases WITH a BOM) if (preg_match('/^(?:\x00\x00\xFE\xFF|\xFF\xFE\x00\x00|\x00\x00\xFF\xFE|\xFE\xFF\x00\x00)/', $xmlChunk)) { return 'UCS-4'; } elseif (preg_match('/^(?:\xFE\xFF|\xFF\xFE)/', $xmlChunk)) { return 'UTF-16'; } elseif (preg_match('/^(?:\xEF\xBB\xBF)/', $xmlChunk)) { return 'UTF-8'; } // 3 - test if encoding is specified in the xml declaration /// @todo this regexp will fail if $xmlChunk uses UTF-32/UCS-4, and most likely UTF-16/UCS-2 as well. In that /// case we leave the guesswork up to mbstring - which seems to be able to detect it, starting with php 5.6. /// For lower versions, we could attempt usage of mb_ereg... // Details: // SPACE: (#x20 | #x9 | #xD | #xA)+ === [ \x9\xD\xA]+ // EQ: SPACE?=SPACE? === [ \x9\xD\xA]*=[ \x9\xD\xA]* if (preg_match('/^<\?xml\s+version\s*=\s*' . "((?:\"[a-zA-Z0-9_.:-]+\")|(?:'[a-zA-Z0-9_.:-]+'))" . '\s+encoding\s*=\s*' . "((?:\"[A-Za-z][A-Za-z0-9._-]*\")|(?:'[A-Za-z][A-Za-z0-9._-]*'))/", $xmlChunk, $matches)) { return strtoupper(substr($matches[2], 1, -1)); } // 4 - if mbstring is available, let it do the guesswork if (function_exists('mb_detect_encoding')) { if ($encodingPrefs == null && PhpXmlRpc::$xmlrpc_detectencodings != null) { $encodingPrefs = PhpXmlRpc::$xmlrpc_detectencodings; } if ($encodingPrefs) { $enc = mb_detect_encoding($xmlChunk, $encodingPrefs); } else { $enc = mb_detect_encoding($xmlChunk); } // NB: mb_detect likes to call it ascii, xml parser likes to call it US_ASCII... // IANA also likes better US-ASCII, so go with it if ($enc == 'ASCII') { $enc = 'US-' . $enc; } return $enc; } else { // no encoding specified: as per HTTP1.1 assume it is iso-8859-1? // Both RFC 2616 (HTTP 1.1) and 1945 (HTTP 1.0) clearly state that for text/xxx content types // this should be the standard. And we should be getting text/xml as request and response. // BUT we have to be backward compatible with the lib, which always used UTF-8 as default... return PhpXmlRpc::$xmlrpc_defencoding; } } /** * Helper function: checks if an xml chunk has a charset declaration (BOM or in the xml declaration). * * @param string $xmlChunk * @return bool * * @todo rename to hasEncodingDeclaration */ public static function hasEncoding($xmlChunk) { // scan the first bytes of the data for a UTF-16 (or other) BOM pattern // (source: http://www.w3.org/TR/2000/REC-xml-20001006) if (preg_match('/^(?:\x00\x00\xFE\xFF|\xFF\xFE\x00\x00|\x00\x00\xFF\xFE|\xFE\xFF\x00\x00)/', $xmlChunk)) { return true; } elseif (preg_match('/^(?:\xFE\xFF|\xFF\xFE)/', $xmlChunk)) { return true; } elseif (preg_match('/^(?:\xEF\xBB\xBF)/', $xmlChunk)) { return true; } // test if encoding is specified in the xml declaration // Details: // SPACE: (#x20 | #x9 | #xD | #xA)+ === [ \x9\xD\xA]+ // EQ: SPACE?=SPACE? === [ \x9\xD\xA]*=[ \x9\xD\xA]* if (preg_match('/^<\?xml\s+version\s*=\s*' . "((?:\"[a-zA-Z0-9_.:-]+\")|(?:'[a-zA-Z0-9_.:-]+'))" . '\s+encoding\s*=\s*' . "((?:\"[A-Za-z][A-Za-z0-9._-]*\")|(?:'[A-Za-z][A-Za-z0-9._-]*'))/", $xmlChunk)) { return true; } return false; } /** * @param string $message * @param string $method method/file/line info * @return bool false if the caller has to stop parsing */ protected function handleParsingError($message, $method = '') { if ($this->current_parsing_options['xmlrpc_reject_invalid_values']) { $this->_xh['isf'] = 2; $this->_xh['isf_reason'] = ucfirst($message); return false; } else { $this->getLogger()->error('XML-RPC: ' . ($method != '' ? $method . ': ' : '') . $message); return true; } } /** * Truncates unsafe data * @param string $data * @return string */ protected function truncateValueForLog($data) { if (strlen($data) > $this->maxLogValueLength) { return substr($data, 0, $this->maxLogValueLength - 3) . '...'; } return $data; } // *** BC layer *** /** * xml parser handler function for opening element tags. * Used in decoding xml chunks that might represent single xml-rpc values as well as requests, responses. * @deprecated * * @param resource $parser * @param $name * @param $attrs * @return void */ public function xmlrpc_se_any($parser, $name, $attrs) { // this will be spamming the log if this method is in use... $this->logDeprecation('Method ' . __METHOD__ . ' is deprecated'); $this->xmlrpc_se($parser, $name, $attrs, true); } public function &__get($name) { switch ($name) { case '_xh': case 'xmlrpc_valid_parents': $this->logDeprecation('Getting property XMLParser::' . $name . ' is deprecated'); return $this->$name; default: /// @todo throw instead? There are very few other places where the lib trigger errors which can potentially reach stdout... $trace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS, 1); trigger_error('Undefined property via __get(): ' . $name . ' in ' . $trace[0]['file'] . ' on line ' . $trace[0]['line'], E_USER_WARNING); $result = null; return $result; } } public function __set($name, $value) { switch ($name) { // this should only ever be called by subclasses which overtook `parse()` case 'accept': $this->logDeprecation('Setting property XMLParser::' . $name . ' is deprecated'); $this->current_parsing_options['accept'] = $value; break; case '_xh': case 'xmlrpc_valid_parents': $this->logDeprecation('Setting property XMLParser::' . $name . ' is deprecated'); $this->$name = $value; break; default: /// @todo throw instead? There are very few other places where the lib trigger errors which can potentially reach stdout... $trace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS, 1); trigger_error('Undefined property via __set(): ' . $name . ' in ' . $trace[0]['file'] . ' on line ' . $trace[0]['line'], E_USER_WARNING); } } public function __isset($name) { switch ($name) { case 'accept': $this->logDeprecation('Checking property XMLParser::' . $name . ' is deprecated'); return isset($this->current_parsing_options['accept']); case '_xh': case 'xmlrpc_valid_parents': $this->logDeprecation('Checking property XMLParser::' . $name . ' is deprecated'); return isset($this->$name); default: return false; } } public function __unset($name) { switch ($name) { // q: does this make sense at all? case 'accept': $this->logDeprecation('Unsetting property XMLParser::' . $name . ' is deprecated'); unset($this->current_parsing_options['accept']); break; case '_xh': case 'xmlrpc_valid_parents': $this->logDeprecation('Unsetting property XMLParser::' . $name . ' is deprecated'); unset($this->$name); break; default: /// @todo throw instead? There are very few other places where the lib trigger errors which can potentially reach stdout... $trace = debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS, 1); trigger_error('Undefined property via __unset(): ' . $name . ' in ' . $trace[0]['file'] . ' on line ' . $trace[0]['line'], E_USER_WARNING); } } }