From 6a24e53d6ca4fbfb3115a8cb30a51283684f0f20 Mon Sep 17 00:00:00 2001 From: Dag Date: Fri, 30 Aug 2024 04:21:51 +0200 Subject: [PATCH] refactor (#4244) --- bridges/AO3Bridge.php | 32 +++++++++++--------- bridges/BMDSystemhausBlogBridge.php | 5 ++-- bridges/TwitchBridge.php | 22 +++++++------- docs/09_Technical_recommendations/index.md | 34 ++++++++++++---------- lib/FeedItem.php | 5 +++- lib/RssBridge.php | 22 ++------------ lib/contents.php | 15 ++++++++-- 7 files changed, 69 insertions(+), 66 deletions(-) diff --git a/bridges/AO3Bridge.php b/bridges/AO3Bridge.php index 4c09c28c..970ed414 100644 --- a/bridges/AO3Bridge.php +++ b/bridges/AO3Bridge.php @@ -68,12 +68,13 @@ class AO3Bridge extends BridgeAbstract */ private function collectList($url) { - $httpClient = RssBridge::getHttpClient(); $version = 'v0.0.1'; - $agent = ['useragent' => "rss-bridge $version (https://github.com/RSS-Bridge/rss-bridge)"]; + $headers = [ + "useragent: rss-bridge $version (https://github.com/RSS-Bridge/rss-bridge)" + ]; + $response = getContents($url, $headers); - $response = $httpClient->request($url, $agent); - $html = \str_get_html($response->getBody()); + $html = \str_get_html($response); $html = defaultLinkTo($html, self::URI); // Get list title. Will include page range + count in some cases @@ -128,14 +129,15 @@ class AO3Bridge extends BridgeAbstract case ('last'): // only way to get this is using the navigate page unfortunately $url .= '/navigate'; - $response = $httpClient->request($url, $agent); - $html = \str_get_html($response->getBody()); + $response = getContents($url, $headers); + $html = \str_get_html($response); $html = defaultLinkTo($html, self::URI); $url = $html->find('ol.index.group > li > a', -1)->href; break; } - $response = $httpClient->request($url, $agent); - $html = \str_get_html($response->getBody()); + $response = getContents($url, $headers); + + $html = \str_get_html($response); $html = defaultLinkTo($html, self::URI); // remove duplicate fic summary if ($ficsum = $html->find('#workskin > .preface > .summary', 0)) { @@ -159,16 +161,18 @@ class AO3Bridge extends BridgeAbstract */ private function collectWork($url) { - $httpClient = RssBridge::getHttpClient(); $version = 'v0.0.1'; - $agent = ['useragent' => "rss-bridge $version (https://github.com/RSS-Bridge/rss-bridge)"]; + $headers = [ + "useragent: rss-bridge $version (https://github.com/RSS-Bridge/rss-bridge)" + ]; + $response = getContents($url . '/navigate', $headers); - $response = $httpClient->request($url . '/navigate', $agent); - $html = \str_get_html($response->getBody()); + $html = \str_get_html($response); $html = defaultLinkTo($html, self::URI); - $response = $httpClient->request($url . '?view_full_work=true', $agent); - $workhtml = \str_get_html($response->getBody()); + $response = getContents($url . '?view_full_work=true', $headers); + + $workhtml = \str_get_html($response); $workhtml = defaultLinkTo($workhtml, self::URI); $this->title = $html->find('h2 a', 0)->plaintext; diff --git a/bridges/BMDSystemhausBlogBridge.php b/bridges/BMDSystemhausBlogBridge.php index 12f3ca5e..98fb2d63 100644 --- a/bridges/BMDSystemhausBlogBridge.php +++ b/bridges/BMDSystemhausBlogBridge.php @@ -54,7 +54,7 @@ class BMDSystemhausBlogBridge extends BridgeAbstract public function collectData() { // get website content - $html = getSimpleHTMLDOM($this->getURI()) or returnServerError('No contents received!'); + $html = getSimpleHTMLDOM($this->getURI()); // Convert relative links in HTML into absolute links $html = defaultLinkTo($html, self::URI); @@ -207,7 +207,8 @@ class BMDSystemhausBlogBridge extends BridgeAbstract //----------------------------------------------------- public function getURI() { - $lURI = $this->getURIbyCountry($this->getInput('country')); + $country = $this->getInput('country') ?? ''; + $lURI = $this->getURIbyCountry($country); return $lURI != '' ? $lURI : parent::getURI(); } diff --git a/bridges/TwitchBridge.php b/bridges/TwitchBridge.php index c273aaca..6605a973 100644 --- a/bridges/TwitchBridge.php +++ b/bridges/TwitchBridge.php @@ -196,23 +196,21 @@ EOD; // e.g. 01:53:27 private function formatTimestampTime($seconds) { - return sprintf( - '%02d:%02d:%02d', - floor($seconds / 3600), - ($seconds / 60) % 60, - $seconds % 60 - ); + $floor = floor($seconds / 3600); + $i = intval($seconds / 60) % 60; + $i1 = $seconds % 60; + + return sprintf('%02d:%02d:%02d', $floor, $i, $i1); } // e.g. 01h53m27s private function formatQueryTime($seconds) { - return sprintf( - '%02dh%02dm%02ds', - floor($seconds / 3600), - ($seconds / 60) % 60, - $seconds % 60 - ); + $floor = floor($seconds / 3600); + $i = intval($seconds / 60) % 60; + $i1 = $seconds % 60; + + return sprintf('%02dh%02dm%02ds', $floor, $i, $i1); } /** diff --git a/docs/09_Technical_recommendations/index.md b/docs/09_Technical_recommendations/index.md index a57f0bbd..c564418e 100644 --- a/docs/09_Technical_recommendations/index.md +++ b/docs/09_Technical_recommendations/index.md @@ -1,28 +1,32 @@ ## General recommendations -* Use [HTTPS](https://en.wikipedia.org/wiki/HTTPS) (`https://...`) over [HTTP](https://en.wikipedia.org/wiki/HTTPS) (`http://...`) whenever possible - ## Test a site before building a bridge -Some sites make use of anti-bot mechanisms (e.g.: by using JavaScript) in which case they work fine in regular browsers, but not in the PHP environment. To check if a site works with RSS-Bridge, create a new bridge using the [template](../05_Bridge_API/02_BridgeAbstract.md#template) and load a valid URL (not the base URL!). +Some sites make use of anti-bot mechanisms (e.g.: by using JavaScript) in which case they work fine in regular browsers, +but not in the PHP environment. + +To check if a site works with RSS-Bridge, create a new bridge using the +[template](../05_Bridge_API/02_BridgeAbstract.md#template) +and load a valid URL (not the base URL!). **Example (using github.com)** ```PHP logger = RssBridge::getLogger(); + global $container; + + // The default NullLogger is for when running the unit tests + $this->logger = $container['logger'] ?? new NullLogger(); } public function __set($name, $value) diff --git a/lib/RssBridge.php b/lib/RssBridge.php index 230488bf..5e90fb13 100644 --- a/lib/RssBridge.php +++ b/lib/RssBridge.php @@ -2,12 +2,12 @@ final class RssBridge { - private static Container $container; + private Container $container; public function __construct( Container $container ) { - self::$container = $container; + $this->container = $container; } public function main(Request $request): Response @@ -20,7 +20,7 @@ final class RssBridge return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => 'Invalid action']), 400); } - $handler = self::$container[$actionName]; + $handler = $this->container[$actionName]; $middlewares = [ new SecurityMiddleware(), @@ -36,20 +36,4 @@ final class RssBridge } return $action($request); } - - public static function getLogger(): Logger - { - // null logger is only for the tests not to fail - return self::$container['logger'] ?? new NullLogger(); - } - - public static function getCache(): CacheInterface - { - return self::$container['cache']; - } - - public static function getHttpClient(): HttpClient - { - return self::$container['http_client']; - } } diff --git a/lib/contents.php b/lib/contents.php index cc9542a9..56a3db20 100644 --- a/lib/contents.php +++ b/lib/contents.php @@ -14,8 +14,13 @@ function getContents( array $curlOptions = [], bool $returnFull = false ) { - $httpClient = RssBridge::getHttpClient(); - $cache = RssBridge::getCache(); + global $container; + + /** @var HttpClient $httpClient */ + $httpClient = $container['http_client']; + + /** @var CacheInterface $cache */ + $cache = $container['cache']; // TODO: consider url validation at this point @@ -212,7 +217,11 @@ function getSimpleHTMLDOMCached( $defaultBRText = DEFAULT_BR_TEXT, $defaultSpanText = DEFAULT_SPAN_TEXT ) { - $cache = RssBridge::getCache(); + global $container; + + /** @var CacheInterface $cache */ + $cache = $container['cache']; + $cacheKey = 'pages_' . $url; $content = $cache->get($cacheKey); if (!$content) {