1
0
mirror of https://github.com/Kovah/LinkAce.git synced 2025-04-21 23:42:10 +02:00

Adjust html meta helper to accept content-type encoding (#238)

This commit is contained in:
Kovah 2021-02-28 15:34:27 +01:00
parent a217d21a34
commit 72e200b4a5
No known key found for this signature in database
GPG Key ID: AAAA031BA9830D7B
2 changed files with 35 additions and 4 deletions

View File

@ -20,6 +20,9 @@ class HtmlMeta
/** @var bool */
protected static $flashAlerts;
/** @var string|null */
protected static $charset;
/**
* Get the title and description of an URL.
*
@ -79,7 +82,6 @@ class HtmlMeta
}
Log::warning($url . ': ' . $e->getMessage());
return null;
} catch (RequestException $e) {
if (self::$flashAlerts) {
@ -87,7 +89,6 @@ class HtmlMeta
}
Log::warning($url . ': ' . $e->getMessage());
return null;
}
@ -95,6 +96,7 @@ class HtmlMeta
return null;
}
self::$charset = explode('charset=', $response->header('content-type'))[1] ?? null;
return $response->body();
}
@ -125,7 +127,7 @@ class HtmlMeta
?? $metaTags['twitter:description']
?? self::$fallback['description'];
if (isset($metaTags['charset']) && strtolower($metaTags['charset']) !== 'utf-8') {
if ($metaTags['charset'] && $metaTags['charset'] !== 'utf-8') {
try {
$title = iconv($metaTags['charset'], 'UTF-8', $title) ?: null;
$description = iconv($metaTags['charset'], 'UTF-8', $description) ?: null;
@ -164,7 +166,9 @@ class HtmlMeta
$pattern = '/<[\s]*meta[\s]*(charset)="?([^>"]*)"?[\s]*>/i';
if (preg_match($pattern, $html, $out)) {
$tags['charset'] = $out[2];
$tags['charset'] = strtolower($out[2]);
} else {
$tags['charset'] = self::$charset ? strtolower(self::$charset) : null;
}
return $tags;

View File

@ -233,4 +233,31 @@ class HtmlMetaHelperTest extends TestCase
$this->assertEquals('duckduckgo.com', $result['title']);
$this->assertTrue($result['success']);
}
/**
* Test the HTML Meta helper function with a valid URL and the charset
* defined in the content-type header.
* The hex2bin('3c6d6574612...') translates to '<meta name="description" content="Qualität">'
* in this case. 'Qualität' must be correctly parsed and converted into
* UTF-8 as the description.
*/
public function testMetaEncodingWithContentType(): void
{
$testHtml = '<!DOCTYPE html><head>' .
hex2bin('3c6d657461206e616d653d226465736372697074696f6e2220636f6e74656e743d225175616c6974e474223e') .
'</head></html>';
Http::fake([
'*' => Http::response($testHtml, 200, [
'Content-Type' => 'text/html; charset=iso-8859-1'
]),
]);
$url = 'https://encoding-test.com/';
$result = HtmlMeta::getFromUrl($url);
$this->assertArrayHasKey('description', $result);
$this->assertEquals('Qualität', $result['description']);
}
}