1
0
mirror of https://github.com/RSS-Bridge/rss-bridge.git synced 2025-01-16 21:58:21 +01:00
This commit is contained in:
Dag 2024-08-30 04:21:51 +02:00 committed by GitHub
parent bb2f471a03
commit 6a24e53d6c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 69 additions and 66 deletions

View File

@ -68,12 +68,13 @@ class AO3Bridge extends BridgeAbstract
*/
private function collectList($url)
{
$httpClient = RssBridge::getHttpClient();
$version = 'v0.0.1';
$agent = ['useragent' => "rss-bridge $version (https://github.com/RSS-Bridge/rss-bridge)"];
$headers = [
"useragent: rss-bridge $version (https://github.com/RSS-Bridge/rss-bridge)"
];
$response = getContents($url, $headers);
$response = $httpClient->request($url, $agent);
$html = \str_get_html($response->getBody());
$html = \str_get_html($response);
$html = defaultLinkTo($html, self::URI);
// Get list title. Will include page range + count in some cases
@ -128,14 +129,15 @@ class AO3Bridge extends BridgeAbstract
case ('last'):
// only way to get this is using the navigate page unfortunately
$url .= '/navigate';
$response = $httpClient->request($url, $agent);
$html = \str_get_html($response->getBody());
$response = getContents($url, $headers);
$html = \str_get_html($response);
$html = defaultLinkTo($html, self::URI);
$url = $html->find('ol.index.group > li > a', -1)->href;
break;
}
$response = $httpClient->request($url, $agent);
$html = \str_get_html($response->getBody());
$response = getContents($url, $headers);
$html = \str_get_html($response);
$html = defaultLinkTo($html, self::URI);
// remove duplicate fic summary
if ($ficsum = $html->find('#workskin > .preface > .summary', 0)) {
@ -159,16 +161,18 @@ class AO3Bridge extends BridgeAbstract
*/
private function collectWork($url)
{
$httpClient = RssBridge::getHttpClient();
$version = 'v0.0.1';
$agent = ['useragent' => "rss-bridge $version (https://github.com/RSS-Bridge/rss-bridge)"];
$headers = [
"useragent: rss-bridge $version (https://github.com/RSS-Bridge/rss-bridge)"
];
$response = getContents($url . '/navigate', $headers);
$response = $httpClient->request($url . '/navigate', $agent);
$html = \str_get_html($response->getBody());
$html = \str_get_html($response);
$html = defaultLinkTo($html, self::URI);
$response = $httpClient->request($url . '?view_full_work=true', $agent);
$workhtml = \str_get_html($response->getBody());
$response = getContents($url . '?view_full_work=true', $headers);
$workhtml = \str_get_html($response);
$workhtml = defaultLinkTo($workhtml, self::URI);
$this->title = $html->find('h2 a', 0)->plaintext;

View File

@ -54,7 +54,7 @@ class BMDSystemhausBlogBridge extends BridgeAbstract
public function collectData()
{
// get website content
$html = getSimpleHTMLDOM($this->getURI()) or returnServerError('No contents received!');
$html = getSimpleHTMLDOM($this->getURI());
// Convert relative links in HTML into absolute links
$html = defaultLinkTo($html, self::URI);
@ -207,7 +207,8 @@ class BMDSystemhausBlogBridge extends BridgeAbstract
//-----------------------------------------------------
public function getURI()
{
$lURI = $this->getURIbyCountry($this->getInput('country'));
$country = $this->getInput('country') ?? '';
$lURI = $this->getURIbyCountry($country);
return $lURI != '' ? $lURI : parent::getURI();
}

View File

@ -196,23 +196,21 @@ EOD;
// e.g. 01:53:27
private function formatTimestampTime($seconds)
{
return sprintf(
'%02d:%02d:%02d',
floor($seconds / 3600),
($seconds / 60) % 60,
$seconds % 60
);
$floor = floor($seconds / 3600);
$i = intval($seconds / 60) % 60;
$i1 = $seconds % 60;
return sprintf('%02d:%02d:%02d', $floor, $i, $i1);
}
// e.g. 01h53m27s
private function formatQueryTime($seconds)
{
return sprintf(
'%02dh%02dm%02ds',
floor($seconds / 3600),
($seconds / 60) % 60,
$seconds % 60
);
$floor = floor($seconds / 3600);
$i = intval($seconds / 60) % 60;
$i1 = $seconds % 60;
return sprintf('%02dh%02dm%02ds', $floor, $i, $i1);
}
/**

View File

@ -1,16 +1,20 @@
## General recommendations
* Use [HTTPS](https://en.wikipedia.org/wiki/HTTPS) (`https://...`) over [HTTP](https://en.wikipedia.org/wiki/HTTPS) (`http://...`) whenever possible
## Test a site before building a bridge
Some sites make use of anti-bot mechanisms (e.g.: by using JavaScript) in which case they work fine in regular browsers, but not in the PHP environment. To check if a site works with RSS-Bridge, create a new bridge using the [template](../05_Bridge_API/02_BridgeAbstract.md#template) and load a valid URL (not the base URL!).
Some sites make use of anti-bot mechanisms (e.g.: by using JavaScript) in which case they work fine in regular browsers,
but not in the PHP environment.
To check if a site works with RSS-Bridge, create a new bridge using the
[template](../05_Bridge_API/02_BridgeAbstract.md#template)
and load a valid URL (not the base URL!).
**Example (using github.com)**
```PHP
<?php
class TestBridge extends BridgeAbstract {
class TestBridge extends BridgeAbstract
{
const NAME = 'Unnamed bridge';
const URI = '';
const DESCRIPTION = 'No description provided';
@ -18,9 +22,9 @@ class TestBridge extends BridgeAbstract {
const PARAMETERS = [];
const CACHE_TIMEOUT = 3600;
public function collectData(){
$html = getSimpleHTMLDOM('https://github.com/rss-bridge/rss-bridge')
or returnServerError('No contents received!');
public function collectData()
{
$html = getSimpleHTMLDOM('https://github.com/rss-bridge/rss-bridge');
}
}
```

View File

@ -25,7 +25,10 @@ class FeedItem
private function __construct()
{
$this->logger = RssBridge::getLogger();
global $container;
// The default NullLogger is for when running the unit tests
$this->logger = $container['logger'] ?? new NullLogger();
}
public function __set($name, $value)

View File

@ -2,12 +2,12 @@
final class RssBridge
{
private static Container $container;
private Container $container;
public function __construct(
Container $container
) {
self::$container = $container;
$this->container = $container;
}
public function main(Request $request): Response
@ -20,7 +20,7 @@ final class RssBridge
return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => 'Invalid action']), 400);
}
$handler = self::$container[$actionName];
$handler = $this->container[$actionName];
$middlewares = [
new SecurityMiddleware(),
@ -36,20 +36,4 @@ final class RssBridge
}
return $action($request);
}
public static function getLogger(): Logger
{
// null logger is only for the tests not to fail
return self::$container['logger'] ?? new NullLogger();
}
public static function getCache(): CacheInterface
{
return self::$container['cache'];
}
public static function getHttpClient(): HttpClient
{
return self::$container['http_client'];
}
}

View File

@ -14,8 +14,13 @@ function getContents(
array $curlOptions = [],
bool $returnFull = false
) {
$httpClient = RssBridge::getHttpClient();
$cache = RssBridge::getCache();
global $container;
/** @var HttpClient $httpClient */
$httpClient = $container['http_client'];
/** @var CacheInterface $cache */
$cache = $container['cache'];
// TODO: consider url validation at this point
@ -212,7 +217,11 @@ function getSimpleHTMLDOMCached(
$defaultBRText = DEFAULT_BR_TEXT,
$defaultSpanText = DEFAULT_SPAN_TEXT
) {
$cache = RssBridge::getCache();
global $container;
/** @var CacheInterface $cache */
$cache = $container['cache'];
$cacheKey = 'pages_' . $url;
$content = $cache->get($cacheKey);
if (!$content) {