mirror of
https://github.com/Seldaek/monolog.git
synced 2025-07-30 09:50:26 +02:00
Attempt to recover from json encoding errors
Detect and attempt to recover from json_encode errors triggered by strings containing invalid UTF-8 sequences. Recovery will only be attempted when encoding strings or arrays. If recovery fails then a RuntimeException will be thrown. The recovery process will convert invalid UTF-8 codepoints as though the input string was encoded using the ISO-8859-15 character encoding. This conversion may result in incorrect string output if the original encoding was not ISO-8859-15, but it will be a valid UTF-8 string. Closes #545
This commit is contained in:
@@ -138,25 +138,76 @@ class NormalizerFormatter implements FormatterInterface
|
||||
return $data;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the JSON representation of a value
|
||||
*
|
||||
* @param mixed $data
|
||||
* @param bool $ignoreErrors
|
||||
* @return string
|
||||
* @throws \RuntimeException if encoding fails and errors are not ignored
|
||||
*/
|
||||
protected function toJson($data, $ignoreErrors = false)
|
||||
{
|
||||
// suppress json_encode errors since it's twitchy with some inputs
|
||||
if ($ignoreErrors) {
|
||||
if (version_compare(PHP_VERSION, '5.4.0', '>=')) {
|
||||
return @json_encode($data, JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE);
|
||||
}
|
||||
|
||||
return @json_encode($data);
|
||||
return @$this->jsonEncode($data);
|
||||
}
|
||||
|
||||
if (version_compare(PHP_VERSION, '5.4.0', '>=')) {
|
||||
$json = json_encode($data, JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE);
|
||||
} else {
|
||||
$json = json_encode($data);
|
||||
}
|
||||
$json = $this->jsonEncode($data);
|
||||
|
||||
if ($json === false) {
|
||||
$this->throwEncodeError(json_last_error(), $data);
|
||||
$json = $this->handleJsonError(json_last_error(), $data);
|
||||
}
|
||||
|
||||
return $json;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param mixed $data
|
||||
* @return string JSON encoded data or null on failure
|
||||
*/
|
||||
private function jsonEncode($data)
|
||||
{
|
||||
if (version_compare(PHP_VERSION, '5.4.0', '>=')) {
|
||||
return json_encode($data, JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE);
|
||||
}
|
||||
|
||||
return json_encode($data);
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle a json_encode failure.
|
||||
*
|
||||
* If the failure is due to invalid string encoding, try to clean the
|
||||
* input and encode again. If the second encoding iattempt fails, the
|
||||
* inital error is not encoding related or the input can't be cleaned then
|
||||
* raise a descriptive exception.
|
||||
*
|
||||
* @param int $code return code of json_last_error function
|
||||
* @param mixed $data data that was meant to be encoded
|
||||
* @return string JSON encoded data after error correction
|
||||
* @throws \RuntimeException if failure can't be corrected
|
||||
*/
|
||||
private function handleJsonError($code, $data)
|
||||
{
|
||||
if ($code !== JSON_ERROR_UTF8) {
|
||||
$this->throwEncodeError($code, $data);
|
||||
}
|
||||
|
||||
if (is_string($data)) {
|
||||
$this->detectAndCleanUtf8($data);
|
||||
|
||||
} elseif (is_array($data)) {
|
||||
array_walk_recursive($data, array($this, 'detectAndCleanUtf8'));
|
||||
|
||||
} else {
|
||||
$this->throwEncodeError($code, $data);
|
||||
}
|
||||
|
||||
$json = $this->jsonEncode($data);
|
||||
|
||||
if ($json === false) {
|
||||
$json = $this->throwEncodeError(json_last_error(), $data);
|
||||
}
|
||||
|
||||
return $json;
|
||||
@@ -190,4 +241,36 @@ class NormalizerFormatter implements FormatterInterface
|
||||
|
||||
throw new \RuntimeException('JSON encoding failed: '.$msg.'. Encoding: '.var_export($data, true));
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect invalid UTF-8 string characters and convert to valid UTF-8.
|
||||
*
|
||||
* Valid UTF-8 input will be left unmodified, but strings containing
|
||||
* invalid UTF-8 codepoints will be reencoded as UTF-8 with an assumed
|
||||
* original encoding of ISO-8859-15. This conversion may result in
|
||||
* incorrect output if the actual encoding was not ISO-8859-15, but it
|
||||
* will be clean UTF-8 output and will not rely on expensive and fragile
|
||||
* detection algorithms.
|
||||
*
|
||||
* Function converts the input in place in the passed variable so that it
|
||||
* can be used as a callback for array_walk_recursive.
|
||||
*
|
||||
* @param mixed &$data Input to check and convert if needed
|
||||
* @access private
|
||||
*/
|
||||
public function detectAndCleanUtf8(&$data)
|
||||
{
|
||||
if (is_string($data) && !preg_match('//u', $data)) {
|
||||
$data = preg_replace_callback(
|
||||
'/[\x80-\xFF]+/',
|
||||
function ($m) { return utf8_encode($m[0]); },
|
||||
$data
|
||||
);
|
||||
$data = str_replace(
|
||||
array('¤', '¦', '¨', '´', '¸', '¼', '½', '¾'),
|
||||
array('€', 'Š', 'š', 'Ž', 'ž', 'Œ', 'œ', 'Ÿ'),
|
||||
$data
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -15,6 +15,12 @@ use Monolog\Logger;
|
||||
|
||||
class LogstashFormatterTest extends \PHPUnit_Framework_TestCase
|
||||
{
|
||||
public function tearDown()
|
||||
{
|
||||
\PHPUnit_Framework_Error_Warning::$enabled = true;
|
||||
return parent::tearDown();
|
||||
}
|
||||
|
||||
/**
|
||||
* @covers Monolog\Formatter\LogstashFormatter::format
|
||||
*/
|
||||
@@ -286,4 +292,41 @@ class LogstashFormatterTest extends \PHPUnit_Framework_TestCase
|
||||
$this->assertArrayHasKey('type', $message);
|
||||
$this->assertEquals('app', $message['type']);
|
||||
}
|
||||
|
||||
public function testFormatWithLatin9Data()
|
||||
{
|
||||
if (version_compare(PHP_VERSION, '5.5.0', '<')) {
|
||||
// Ignore the warning that will be emitted by PHP <5.5.0
|
||||
\PHPUnit_Framework_Error_Warning::$enabled = false;
|
||||
}
|
||||
$formatter = new LogstashFormatter('test', 'hostname');
|
||||
$record = array(
|
||||
'level' => Logger::ERROR,
|
||||
'level_name' => 'ERROR',
|
||||
'channel' => '¯\_(ツ)_/¯',
|
||||
'context' => array(),
|
||||
'datetime' => new \DateTime("@0"),
|
||||
'extra' => array(
|
||||
'user_agent' => "\xD6WN; FBCR/OrangeEspa\xF1a; Vers\xE3o/4.0; F\xE4rist",
|
||||
),
|
||||
'message' => 'log',
|
||||
);
|
||||
|
||||
$message = json_decode($formatter->format($record), true);
|
||||
|
||||
$this->assertEquals("1970-01-01T00:00:00.000000+00:00", $message['@timestamp']);
|
||||
$this->assertEquals('log', $message['@message']);
|
||||
$this->assertEquals('¯\_(ツ)_/¯', $message['@fields']['channel']);
|
||||
$this->assertContains('¯\_(ツ)_/¯', $message['@tags']);
|
||||
$this->assertEquals(Logger::ERROR, $message['@fields']['level']);
|
||||
$this->assertEquals('test', $message['@type']);
|
||||
$this->assertEquals('hostname', $message['@source']);
|
||||
if (version_compare(PHP_VERSION, '5.5.0', '>=')) {
|
||||
$this->assertEquals('ÖWN; FBCR/OrangeEspaña; Versão/4.0; Färist', $message['@fields']['user_agent']);
|
||||
} else {
|
||||
// PHP <5.5 does not return false for an element encoding failure,
|
||||
// instead it emits a warning (possibly) and nulls the value.
|
||||
$this->assertEquals(null, $message['@fields']['user_agent']);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -16,6 +16,12 @@ namespace Monolog\Formatter;
|
||||
*/
|
||||
class NormalizerFormatterTest extends \PHPUnit_Framework_TestCase
|
||||
{
|
||||
public function tearDown()
|
||||
{
|
||||
\PHPUnit_Framework_Error_Warning::$enabled = true;
|
||||
return parent::tearDown();
|
||||
}
|
||||
|
||||
public function testFormat()
|
||||
{
|
||||
$formatter = new NormalizerFormatter('Y-m-d');
|
||||
@@ -188,17 +194,100 @@ class NormalizerFormatterTest extends \PHPUnit_Framework_TestCase
|
||||
*/
|
||||
public function testThrowsOnInvalidEncoding()
|
||||
{
|
||||
if (version_compare(PHP_VERSION, '5.5.0', '<')) {
|
||||
// Ignore the warning that will be emitted by PHP <5.5.0
|
||||
\PHPUnit_Framework_Error_Warning::$enabled = false;
|
||||
}
|
||||
$formatter = new NormalizerFormatter();
|
||||
$reflMethod = new \ReflectionMethod($formatter, 'toJson');
|
||||
$reflMethod->setAccessible(true);
|
||||
|
||||
// send an invalid unicode sequence
|
||||
$res = $reflMethod->invoke($formatter, array('message' => "\xB1\x31"));
|
||||
// send an invalid unicode sequence as a object that can't be cleaned
|
||||
$record = new \stdClass;
|
||||
$record->message = "\xB1\x31";
|
||||
$res = $reflMethod->invoke($formatter, $record);
|
||||
if (PHP_VERSION_ID < 50500 && $res === '{"message":null}') {
|
||||
throw new \RuntimeException('PHP 5.3/5.4 throw a warning and null the value instead of returning false entirely');
|
||||
}
|
||||
}
|
||||
|
||||
public function testConvertsInvalidEncodingAsLatin9()
|
||||
{
|
||||
if (version_compare(PHP_VERSION, '5.5.0', '<')) {
|
||||
// Ignore the warning that will be emitted by PHP <5.5.0
|
||||
\PHPUnit_Framework_Error_Warning::$enabled = false;
|
||||
}
|
||||
$formatter = new NormalizerFormatter();
|
||||
$reflMethod = new \ReflectionMethod($formatter, 'toJson');
|
||||
$reflMethod->setAccessible(true);
|
||||
|
||||
$res = $reflMethod->invoke($formatter, array('message' => "\xA4\xA6\xA8\xB4\xB8\xBC\xBD\xBE"));
|
||||
|
||||
if (version_compare(PHP_VERSION, '5.5.0', '>=')) {
|
||||
$this->assertSame('{"message":"€ŠšŽžŒœŸ"}', $res);
|
||||
} else {
|
||||
// PHP <5.5 does not return false for an element encoding failure,
|
||||
// instead it emits a warning (possibly) and nulls the value.
|
||||
$this->assertSame('{"message":null}', $res);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param mixed $in Input
|
||||
* @param mixed $expect Expected output
|
||||
* @covers Monolog\Formatter\NormalizerFormatter::detectAndCleanUtf8
|
||||
* @dataProvider providesDetectAndCleanUtf8
|
||||
*/
|
||||
public function testDetectAndCleanUtf8($in, $expect)
|
||||
{
|
||||
$formatter = new NormalizerFormatter();
|
||||
$formatter->detectAndCleanUtf8($in);
|
||||
$this->assertSame($expect, $in);
|
||||
}
|
||||
|
||||
public function providesDetectAndCleanUtf8()
|
||||
{
|
||||
$obj = new \stdClass;
|
||||
return array(
|
||||
'null' => array(null, null),
|
||||
'int' => array(123, 123),
|
||||
'float' => array(123.45, 123.45),
|
||||
'bool false' => array(false, false),
|
||||
'bool true' => array(true, true),
|
||||
'ascii string' => array('abcdef', 'abcdef'),
|
||||
'latin9 string' => array("\xB1\x31\xA4\xA6\xA8\xB4\xB8\xBC\xBD\xBE\xFF", '±1€ŠšŽžŒœŸÿ'),
|
||||
'unicode string' => array('¤¦¨´¸¼½¾€ŠšŽžŒœŸ', '¤¦¨´¸¼½¾€ŠšŽžŒœŸ'),
|
||||
'empty array' => array(array(), array()),
|
||||
'array' => array(array('abcdef'), array('abcdef')),
|
||||
'object' => array($obj, $obj),
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param int $code
|
||||
* @param string $msg
|
||||
* @dataProvider providesHandleJsonErrorFailure
|
||||
*/
|
||||
public function testHandleJsonErrorFailure($code, $msg)
|
||||
{
|
||||
$formatter = new NormalizerFormatter();
|
||||
$reflMethod = new \ReflectionMethod($formatter, 'handleJsonError');
|
||||
$reflMethod->setAccessible(true);
|
||||
|
||||
$this->setExpectedException('RuntimeException', $msg);
|
||||
$reflMethod->invoke($formatter, $code, 'faked');
|
||||
}
|
||||
|
||||
public function providesHandleJsonErrorFailure()
|
||||
{
|
||||
return array(
|
||||
'depth' => array(JSON_ERROR_DEPTH, 'Maximum stack depth exceeded'),
|
||||
'state' => array(JSON_ERROR_STATE_MISMATCH, 'Underflow or the modes mismatch'),
|
||||
'ctrl' => array(JSON_ERROR_CTRL_CHAR, 'Unexpected control character found'),
|
||||
'default' => array(-1, 'Unknown error'),
|
||||
);
|
||||
}
|
||||
|
||||
public function testExceptionTraceWithArgs()
|
||||
{
|
||||
if (defined('HHVM_VERSION')) {
|
||||
@@ -284,4 +373,4 @@ class TestToStringError
|
||||
{
|
||||
throw new \RuntimeException('Could not convert to string');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user