1
0
mirror of https://github.com/Kovah/LinkAce.git synced 2025-04-19 06:27:00 +02:00

Replace parts of HTML Meta helper with a new laravel-html-meta package (#238)

This commit is contained in:
Kovah 2021-03-07 14:24:08 +01:00
parent 2a25af3e3f
commit 6f8b41aa39
No known key found for this signature in database
GPG Key ID: AAAA031BA9830D7B
6 changed files with 123 additions and 163 deletions

View File

@ -2,27 +2,15 @@
namespace App\Helper;
use GuzzleHttp\Exception\RequestException;
use Illuminate\Http\Client\ConnectionException;
use Illuminate\Support\Facades\Http;
use Illuminate\Support\Facades\Log;
use Kovah\HtmlMeta\Exceptions\InvalidUrlException;
use Kovah\HtmlMeta\Exceptions\UnreachableUrlException;
/**
* Class HtmlMeta
*
* @package App\Helper
*/
class HtmlMeta
{
/** @var array */
protected static $fallback;
/** @var bool */
protected static $flashAlerts;
/** @var string|null */
protected static $charset;
/**
* Get the title and description of an URL.
*
@ -39,157 +27,58 @@ class HtmlMeta
*/
public static function getFromUrl(string $url, bool $flashAlerts = false): array
{
self::$flashAlerts = $flashAlerts;
self::buildFallback($url);
if (!filter_var($url, FILTER_VALIDATE_URL)) {
return [
'success' => false,
'title' => $url,
'description' => null,
];
}
self::$fallback = [
'success' => false,
'title' => parse_url($url, PHP_URL_HOST),
'description' => null,
];
$html = self::getHtmlContent($url);
if ($html === null) {
try {
$meta = \Kovah\HtmlMeta\Facades\HtmlMeta::forUrl($url);
} catch (InvalidUrlException $e) {
Log::warning($url . ': ' . $e->getMessage());
if ($flashAlerts) {
flash(trans('link.added_connection_error'), 'warning');
}
return self::$fallback;
} catch (UnreachableUrlException $e) {
Log::warning($url . ': ' . $e->getMessage());
if ($flashAlerts) {
flash(trans('link.added_request_error'), 'warning');
}
return self::$fallback;
}
return self::buildHtmlMeta($html);
return self::buildLinkMeta($meta);
}
/**
* Try to get the HTML content of an URL.
* If a connection or response error occurs, null is returned, otherwise
* the HTML as a string.
* Build a response array containing the link meta including a success flag.
*
* @param string $url
* @return string|null
*/
protected static function getHtmlContent(string $url): ?string
{
try {
$response = Http::timeout(5)->get($url);
} catch (ConnectionException $e) {
if (self::$flashAlerts) {
flash(trans('link.added_connection_error'), 'warning');
}
Log::warning($url . ': ' . $e->getMessage());
return null;
} catch (RequestException $e) {
if (self::$flashAlerts) {
flash(trans('link.added_request_error'), 'warning');
}
Log::warning($url . ': ' . $e->getMessage());
return null;
}
if (!$response->successful()) {
return null;
}
self::$charset = explode('charset=', $response->header('content-type'))[1] ?? null;
return $response->body();
}
/**
* Returns an array containing the title and description parsed from the
* given HTML.
*
* If a charset meta tag was found and it does not contain UTF-8 as a value,
* the method tries to convert both values from the given charset into UTF-8.
* If it fails, it returns null because we most likely can't generate any
* useful information here.
*
* If no charset is available, the method will check if the title is encoded
* as UTF-8. If it does not pass the check, title and description will be set
* to null as we will most likely not be able to get any correctly encoded
* information from the strings without proper encoding information.
*
* @param string $html
* @param array $metaTags
* @return array
*/
protected static function buildHtmlMeta(string $html): array
protected static function buildLinkMeta(array $metaTags): array
{
$title = self::parseTitle($html);
$metaTags = self::getMetaTags($html);
$description = $metaTags['description']
$metaTags['description'] = $metaTags['description']
?? $metaTags['og:description']
?? $metaTags['twitter:description']
?? self::$fallback['description'];
if ($metaTags['charset'] && $metaTags['charset'] !== 'utf-8') {
try {
$title = iconv($metaTags['charset'], 'UTF-8', $title) ?: null;
$description = iconv($metaTags['charset'], 'UTF-8', $description) ?: null;
} catch (\ErrorException $e) {
$title = null;
$description = null;
}
} elseif (mb_detect_encoding($title, 'UTF-8', true) === false) {
$title = null;
$description = null;
}
?? null;
return [
'success' => true,
'title' => $title ?? self::$fallback['title'],
'description' => $description,
'title' => $metaTags['title'] ?? self::$fallback['title'],
'description' => $metaTags['description'],
];
}
/**
* Parses the meta tags from HTML by using a specific regex.
* Returns an array of all found meta tags or an empty array if no tags were found.
* The fallback is used in case of errors while trying to get the link meta.
*
* @param string $html
* @return array
* @param string $url
*/
protected static function getMetaTags(string $html): array
protected static function buildFallback(string $url): void
{
$tags = [];
$pattern = '/<[\s]*meta[\s]*(name|property)="?([^>"]*)"?[\s]*content="?([^>"]*)"?[\s]*[\/]?[\s]*>/i';
if (preg_match_all($pattern, $html, $out)) {
$tags = array_combine($out[2], $out[3]);
}
$pattern = '/<[\s]*meta[\s]*(charset)="?([^>"]*)"?[\s]*>/i';
if (preg_match($pattern, $html, $out)) {
$tags['charset'] = strtolower($out[2]);
} else {
$tags['charset'] = self::$charset ? strtolower(self::$charset) : null;
}
return $tags;
}
/**
* Try to parse the title tag from the HTML by using regex.
* If a title tag was found, excessive whitespace and newlines are removed from the string.
*
* @param $html
* @return string|null
*/
protected static function parseTitle($html): ?string
{
$res = preg_match("/<title>(.*)<\/title>/siU", $html, $titleMatches);
if ($res) {
$title = preg_replace('/\s+/', ' ', $titleMatches[1]);
$title = trim($title);
}
return $title ?? null;
self::$fallback = [
'success' => false,
'title' => parse_url($url, PHP_URL_HOST) ?? $url,
'description' => false,
];
}
}

View File

@ -5,6 +5,7 @@
"type": "project",
"require": {
"php": "^7.3 | ^8.0",
"kovah/laravel-html-meta": "^1.0",
"composer/semver": "^1.5",
"doctrine/dbal": "^2.10.2",
"fideloper/proxy": "^4.4",
@ -35,12 +36,6 @@
"roave/security-advisories": "dev-latest",
"squizlabs/php_codesniffer": "^3.5"
},
"repositories": [
{
"type": "composer",
"url": "https://satis.laravel-enlightn.com"
}
],
"autoload": {
"files": [
"app/Helper/functions.php"

87
composer.lock generated
View File

@ -4,7 +4,7 @@
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
"This file is @generated automatically"
],
"content-hash": "9cf68002add091cb3c5cb683a1fd652f",
"content-hash": "970e7c41642a1817c593f6f2a9423513",
"packages": [
{
"name": "asm89/stack-cors",
@ -1368,16 +1368,16 @@
},
{
"name": "guzzlehttp/promises",
"version": "1.4.0",
"version": "1.4.1",
"source": {
"type": "git",
"url": "https://github.com/guzzle/promises.git",
"reference": "60d379c243457e073cff02bc323a2a86cb355631"
"reference": "8e7d04f1f6450fef59366c399cfad4b9383aa30d"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/guzzle/promises/zipball/60d379c243457e073cff02bc323a2a86cb355631",
"reference": "60d379c243457e073cff02bc323a2a86cb355631",
"url": "https://api.github.com/repos/guzzle/promises/zipball/8e7d04f1f6450fef59366c399cfad4b9383aa30d",
"reference": "8e7d04f1f6450fef59366c399cfad4b9383aa30d",
"shasum": ""
},
"require": {
@ -1417,9 +1417,9 @@
],
"support": {
"issues": "https://github.com/guzzle/promises/issues",
"source": "https://github.com/guzzle/promises/tree/1.4.0"
"source": "https://github.com/guzzle/promises/tree/1.4.1"
},
"time": "2020-09-30T07:37:28+00:00"
"time": "2021-03-07T09:25:29+00:00"
},
{
"name": "guzzlehttp/psr7",
@ -1663,6 +1663,79 @@
},
"time": "2016-11-07T19:29:14+00:00"
},
{
"name": "kovah/laravel-html-meta",
"version": "v1.0.0",
"source": {
"type": "git",
"url": "https://github.com/Kovah/laravel-html-meta.git",
"reference": "d6690f3520d81479927239d498d62455626c5448"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/Kovah/laravel-html-meta/zipball/d6690f3520d81479927239d498d62455626c5448",
"reference": "d6690f3520d81479927239d498d62455626c5448",
"shasum": ""
},
"require": {
"guzzlehttp/guzzle": "^7.2",
"illuminate/http": "^7.0 | ^8.0",
"illuminate/support": "^7.0 | ^8.0",
"php": "^7.3 | ^8.0"
},
"require-dev": {
"mockery/mockery": "^1.4.2",
"orchestra/testbench": "^6.13",
"phpunit/phpunit": "^9.0"
},
"type": "library",
"extra": {
"laravel": {
"providers": [
"Kovah\\HtmlMeta\\HtmlMetaServiceProvider"
],
"aliases": {
"HtmlMeta": "Kovah\\HtmlMeta\\Facades\\HtmlMeta"
}
}
},
"autoload": {
"psr-4": {
"Kovah\\HtmlMeta\\": "src"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Kevin Woblick",
"email": "mail@kovah.de",
"homepage": "https://kovah.de",
"role": "Developer"
}
],
"description": "A Laravel package to parse meta information from an URL.",
"homepage": "https://github.com/kovah/laravel-html-meta",
"keywords": [
"html",
"laravel",
"meta",
"parser"
],
"support": {
"issues": "https://github.com/Kovah/laravel-html-meta/issues",
"source": "https://github.com/Kovah/laravel-html-meta/tree/v1.0.0"
},
"funding": [
{
"url": "https://github.com/sponsors/kovah",
"type": "github"
}
],
"time": "2021-03-07T13:17:14+00:00"
},
{
"name": "laracasts/flash",
"version": "3.2",

5
config/html-meta.php Normal file
View File

@ -0,0 +1,5 @@
<?php
return [
'timeout' => 10,
'parser' => \Kovah\HtmlMeta\HtmlMetaParser::class,
];

View File

@ -49,8 +49,8 @@ return [
'check_enabled' => 'Check is enabled',
'added_successfully' => 'Link added successfully.',
'added_connection_error' => 'The Link was added but a connection error occured when trying to access the URL. Details can be found in the logs.',
'added_request_error' => 'The Link was added but an error occured when trying to request the URL, for example an invalid certificate. Details can be found in the logs.',
'added_connection_error' => 'The Link was added but a connection error occurred when trying to access the URL. Details can be found in the logs.',
'added_request_error' => 'The Link was added but an error occurred when trying to request the URL, for example an invalid certificate. Details can be found in the logs.',
'updated_successfully' => 'Link updated successfully.',
'deleted_successfully' => 'Link deleted successfully.',
'deletion_error' => 'Link could not be deleted.',

View File

@ -85,7 +85,7 @@ class HtmlMetaHelperTest extends TestCase
* Test the titleFromURL() helper function with an invalid URL
* Will return just the host of the given URL.
*/
public function testTitleFromURLwithoutProtocol(): void
public function testTitleFromUrlWithoutProtocol(): void
{
$url = 'duckduckgo.com/about-us';
@ -124,8 +124,7 @@ class HtmlMetaHelperTest extends TestCase
$flashMessage = session('flash_notification', collect())->first();
$this->assertEquals(
'The Link was added but an error occured when trying to request the URL, ' .
'for example an invalid certificate. Details can be found in the logs.',
'The Link was added but an error occurred when trying to request the URL, for example an invalid certificate. Details can be found in the logs.',
$flashMessage['message']
);
}
@ -154,8 +153,7 @@ class HtmlMetaHelperTest extends TestCase
$flashMessage = session('flash_notification', collect())->first();
$this->assertEquals(
'The Link was added but a connection error occured when trying to access the URL.' .
' Details can be found in the logs.',
'The Link was added but an error occurred when trying to request the URL, for example an invalid certificate. Details can be found in the logs.',
$flashMessage['message']
);
}