1
0
mirror of https://github.com/Seldaek/monolog.git synced 2025-07-30 09:50:26 +02:00

Attempt to recover from json encoding errors

Detect and attempt to recover from json_encode errors triggered by
strings containing invalid UTF-8 sequences. Recovery will only be
attempted when encoding strings or arrays. If recovery fails then
a RuntimeException will be thrown.

The recovery process will convert invalid UTF-8 codepoints as though the
input string was encoded using the ISO-8859-15 character encoding. This
conversion may result in incorrect string output if the original
encoding was not ISO-8859-15, but it will be a valid UTF-8 string.

Closes #545
This commit is contained in:
Bryan Davis
2015-11-11 20:33:44 -07:00
committed by Bryan Davis
parent f86e643a58
commit 6f9e221bd6
3 changed files with 229 additions and 14 deletions

View File

@@ -138,25 +138,76 @@ class NormalizerFormatter implements FormatterInterface
return $data;
}
/**
* Return the JSON representation of a value
*
* @param mixed $data
* @param bool $ignoreErrors
* @return string
* @throws \RuntimeException if encoding fails and errors are not ignored
*/
protected function toJson($data, $ignoreErrors = false)
{
// suppress json_encode errors since it's twitchy with some inputs
if ($ignoreErrors) {
if (version_compare(PHP_VERSION, '5.4.0', '>=')) {
return @json_encode($data, JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE);
}
return @json_encode($data);
return @$this->jsonEncode($data);
}
if (version_compare(PHP_VERSION, '5.4.0', '>=')) {
$json = json_encode($data, JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE);
} else {
$json = json_encode($data);
}
$json = $this->jsonEncode($data);
if ($json === false) {
$this->throwEncodeError(json_last_error(), $data);
$json = $this->handleJsonError(json_last_error(), $data);
}
return $json;
}
/**
* @param mixed $data
* @return string JSON encoded data or null on failure
*/
private function jsonEncode($data)
{
if (version_compare(PHP_VERSION, '5.4.0', '>=')) {
return json_encode($data, JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE);
}
return json_encode($data);
}
/**
* Handle a json_encode failure.
*
* If the failure is due to invalid string encoding, try to clean the
* input and encode again. If the second encoding iattempt fails, the
* inital error is not encoding related or the input can't be cleaned then
* raise a descriptive exception.
*
* @param int $code return code of json_last_error function
* @param mixed $data data that was meant to be encoded
* @return string JSON encoded data after error correction
* @throws \RuntimeException if failure can't be corrected
*/
private function handleJsonError($code, $data)
{
if ($code !== JSON_ERROR_UTF8) {
$this->throwEncodeError($code, $data);
}
if (is_string($data)) {
$this->detectAndCleanUtf8($data);
} elseif (is_array($data)) {
array_walk_recursive($data, array($this, 'detectAndCleanUtf8'));
} else {
$this->throwEncodeError($code, $data);
}
$json = $this->jsonEncode($data);
if ($json === false) {
$json = $this->throwEncodeError(json_last_error(), $data);
}
return $json;
@@ -190,4 +241,36 @@ class NormalizerFormatter implements FormatterInterface
throw new \RuntimeException('JSON encoding failed: '.$msg.'. Encoding: '.var_export($data, true));
}
/**
* Detect invalid UTF-8 string characters and convert to valid UTF-8.
*
* Valid UTF-8 input will be left unmodified, but strings containing
* invalid UTF-8 codepoints will be reencoded as UTF-8 with an assumed
* original encoding of ISO-8859-15. This conversion may result in
* incorrect output if the actual encoding was not ISO-8859-15, but it
* will be clean UTF-8 output and will not rely on expensive and fragile
* detection algorithms.
*
* Function converts the input in place in the passed variable so that it
* can be used as a callback for array_walk_recursive.
*
* @param mixed &$data Input to check and convert if needed
* @access private
*/
public function detectAndCleanUtf8(&$data)
{
if (is_string($data) && !preg_match('//u', $data)) {
$data = preg_replace_callback(
'/[\x80-\xFF]+/',
function ($m) { return utf8_encode($m[0]); },
$data
);
$data = str_replace(
array('¤', '¦', '¨', '´', '¸', '¼', '½', '¾'),
array('€', 'Š', 'š', 'Ž', 'ž', 'Œ', 'œ', 'Ÿ'),
$data
);
}
}
}

View File

@@ -15,6 +15,12 @@ use Monolog\Logger;
class LogstashFormatterTest extends \PHPUnit_Framework_TestCase
{
public function tearDown()
{
\PHPUnit_Framework_Error_Warning::$enabled = true;
return parent::tearDown();
}
/**
* @covers Monolog\Formatter\LogstashFormatter::format
*/
@@ -286,4 +292,41 @@ class LogstashFormatterTest extends \PHPUnit_Framework_TestCase
$this->assertArrayHasKey('type', $message);
$this->assertEquals('app', $message['type']);
}
public function testFormatWithLatin9Data()
{
if (version_compare(PHP_VERSION, '5.5.0', '<')) {
// Ignore the warning that will be emitted by PHP <5.5.0
\PHPUnit_Framework_Error_Warning::$enabled = false;
}
$formatter = new LogstashFormatter('test', 'hostname');
$record = array(
'level' => Logger::ERROR,
'level_name' => 'ERROR',
'channel' => '¯\_(ツ)_/¯',
'context' => array(),
'datetime' => new \DateTime("@0"),
'extra' => array(
'user_agent' => "\xD6WN; FBCR/OrangeEspa\xF1a; Vers\xE3o/4.0; F\xE4rist",
),
'message' => 'log',
);
$message = json_decode($formatter->format($record), true);
$this->assertEquals("1970-01-01T00:00:00.000000+00:00", $message['@timestamp']);
$this->assertEquals('log', $message['@message']);
$this->assertEquals('¯\_(ツ)_/¯', $message['@fields']['channel']);
$this->assertContains('¯\_(ツ)_/¯', $message['@tags']);
$this->assertEquals(Logger::ERROR, $message['@fields']['level']);
$this->assertEquals('test', $message['@type']);
$this->assertEquals('hostname', $message['@source']);
if (version_compare(PHP_VERSION, '5.5.0', '>=')) {
$this->assertEquals('ÖWN; FBCR/OrangeEspaña; Versão/4.0; Färist', $message['@fields']['user_agent']);
} else {
// PHP <5.5 does not return false for an element encoding failure,
// instead it emits a warning (possibly) and nulls the value.
$this->assertEquals(null, $message['@fields']['user_agent']);
}
}
}

View File

@@ -16,6 +16,12 @@ namespace Monolog\Formatter;
*/
class NormalizerFormatterTest extends \PHPUnit_Framework_TestCase
{
public function tearDown()
{
\PHPUnit_Framework_Error_Warning::$enabled = true;
return parent::tearDown();
}
public function testFormat()
{
$formatter = new NormalizerFormatter('Y-m-d');
@@ -188,17 +194,100 @@ class NormalizerFormatterTest extends \PHPUnit_Framework_TestCase
*/
public function testThrowsOnInvalidEncoding()
{
if (version_compare(PHP_VERSION, '5.5.0', '<')) {
// Ignore the warning that will be emitted by PHP <5.5.0
\PHPUnit_Framework_Error_Warning::$enabled = false;
}
$formatter = new NormalizerFormatter();
$reflMethod = new \ReflectionMethod($formatter, 'toJson');
$reflMethod->setAccessible(true);
// send an invalid unicode sequence
$res = $reflMethod->invoke($formatter, array('message' => "\xB1\x31"));
// send an invalid unicode sequence as a object that can't be cleaned
$record = new \stdClass;
$record->message = "\xB1\x31";
$res = $reflMethod->invoke($formatter, $record);
if (PHP_VERSION_ID < 50500 && $res === '{"message":null}') {
throw new \RuntimeException('PHP 5.3/5.4 throw a warning and null the value instead of returning false entirely');
}
}
public function testConvertsInvalidEncodingAsLatin9()
{
if (version_compare(PHP_VERSION, '5.5.0', '<')) {
// Ignore the warning that will be emitted by PHP <5.5.0
\PHPUnit_Framework_Error_Warning::$enabled = false;
}
$formatter = new NormalizerFormatter();
$reflMethod = new \ReflectionMethod($formatter, 'toJson');
$reflMethod->setAccessible(true);
$res = $reflMethod->invoke($formatter, array('message' => "\xA4\xA6\xA8\xB4\xB8\xBC\xBD\xBE"));
if (version_compare(PHP_VERSION, '5.5.0', '>=')) {
$this->assertSame('{"message":"€ŠšŽžŒœŸ"}', $res);
} else {
// PHP <5.5 does not return false for an element encoding failure,
// instead it emits a warning (possibly) and nulls the value.
$this->assertSame('{"message":null}', $res);
}
}
/**
* @param mixed $in Input
* @param mixed $expect Expected output
* @covers Monolog\Formatter\NormalizerFormatter::detectAndCleanUtf8
* @dataProvider providesDetectAndCleanUtf8
*/
public function testDetectAndCleanUtf8($in, $expect)
{
$formatter = new NormalizerFormatter();
$formatter->detectAndCleanUtf8($in);
$this->assertSame($expect, $in);
}
public function providesDetectAndCleanUtf8()
{
$obj = new \stdClass;
return array(
'null' => array(null, null),
'int' => array(123, 123),
'float' => array(123.45, 123.45),
'bool false' => array(false, false),
'bool true' => array(true, true),
'ascii string' => array('abcdef', 'abcdef'),
'latin9 string' => array("\xB1\x31\xA4\xA6\xA8\xB4\xB8\xBC\xBD\xBE\xFF", '±1€ŠšŽžŒœŸÿ'),
'unicode string' => array('¤¦¨´¸¼½¾€ŠšŽžŒœŸ', '¤¦¨´¸¼½¾€ŠšŽžŒœŸ'),
'empty array' => array(array(), array()),
'array' => array(array('abcdef'), array('abcdef')),
'object' => array($obj, $obj),
);
}
/**
* @param int $code
* @param string $msg
* @dataProvider providesHandleJsonErrorFailure
*/
public function testHandleJsonErrorFailure($code, $msg)
{
$formatter = new NormalizerFormatter();
$reflMethod = new \ReflectionMethod($formatter, 'handleJsonError');
$reflMethod->setAccessible(true);
$this->setExpectedException('RuntimeException', $msg);
$reflMethod->invoke($formatter, $code, 'faked');
}
public function providesHandleJsonErrorFailure()
{
return array(
'depth' => array(JSON_ERROR_DEPTH, 'Maximum stack depth exceeded'),
'state' => array(JSON_ERROR_STATE_MISMATCH, 'Underflow or the modes mismatch'),
'ctrl' => array(JSON_ERROR_CTRL_CHAR, 'Unexpected control character found'),
'default' => array(-1, 'Unknown error'),
);
}
public function testExceptionTraceWithArgs()
{
if (defined('HHVM_VERSION')) {
@@ -284,4 +373,4 @@ class TestToStringError
{
throw new \RuntimeException('Could not convert to string');
}
}
}