1
0
mirror of https://github.com/RSS-Bridge/rss-bridge.git synced 2025-07-30 21:30:14 +02:00
This commit is contained in:
Dag
2023-09-20 02:45:48 +02:00
committed by GitHub
parent cf7e3eea56
commit e6aef73a02
15 changed files with 134 additions and 384 deletions

View File

@@ -1,76 +1,15 @@
<?php
/**
* This file is part of RSS-Bridge, a PHP project capable of generating RSS and
* Atom feeds for websites that don't have one.
*
* For the full license information, please view the UNLICENSE file distributed
* with this source code.
*
* @package Core
* @license http://unlicense.org/ UNLICENSE
* @link https://github.com/rss-bridge/rss-bridge
*/
abstract class BridgeAbstract implements BridgeInterface
abstract class BridgeAbstract
{
/**
* Name of the bridge
*
* Use {@see BridgeAbstract::getName()} to read this parameter
*/
const NAME = 'Unnamed bridge';
/**
* URI to the site the bridge is intended to be used for.
*
* Use {@see BridgeAbstract::getURI()} to read this parameter
*/
const URI = '';
/**
* Donation URI to the site the bridge is intended to be used for.
*
* Use {@see BridgeAbstract::getDonationURI()} to read this parameter
*/
const DONATION_URI = '';
/**
* A brief description of what the bridge can do
*
* Use {@see BridgeAbstract::getDescription()} to read this parameter
*/
const DESCRIPTION = 'No description provided';
/**
* The name of the maintainer. Multiple maintainers can be separated by comma
*
* Use {@see BridgeAbstract::getMaintainer()} to read this parameter
*/
const MAINTAINER = 'No maintainer';
/**
* The default cache timeout for the bridge
*
* Use {@see BridgeAbstract::getCacheTimeout()} to read this parameter
*/
const CACHE_TIMEOUT = 3600;
/**
* Configuration for the bridge
*/
const CONFIGURATION = [];
/**
* Parameters for the bridge
*
* Use {@see BridgeAbstract::getParameters()} to read this parameter
*/
const PARAMETERS = [];
/**
* Test cases for detectParameters for the bridge
*/
const TEST_DETECT_PARAMETERS = [];
/**
@@ -83,49 +22,67 @@ abstract class BridgeAbstract implements BridgeInterface
'title' => 'Maximum number of items to return',
];
/**
* Holds the list of items collected by the bridge
*
* Items must be collected by {@see BridgeInterface::collectData()}
*
* Use {@see BridgeAbstract::getItems()} to access items.
*
* @var array
*/
protected array $items = [];
/**
* Holds the list of input parameters used by the bridge
*
* Do not access this parameter directly!
* Use {@see BridgeAbstract::setInputs()} and {@see BridgeAbstract::getInput()} instead!
*
* @var array
*/
protected array $inputs = [];
/**
* Holds the name of the queried context
*
* @var string
*/
protected $queriedContext = '';
/**
* Holds the list of bridge-specific configurations from config.ini.php, used by the bridge.
*/
protected string $queriedContext = '';
private array $configuration = [];
public function __construct()
{
}
/** {@inheritdoc} */
abstract public function collectData();
public function getItems()
{
return $this->items;
}
public function getOption(string $name)
{
return $this->configuration[$name] ?? null;
}
public function getDescription()
{
return static::DESCRIPTION;
}
public function getMaintainer(): string
{
return static::MAINTAINER;
}
public function getName()
{
return static::NAME;
}
public function getIcon()
{
return static::URI . '/favicon.ico';
}
public function getParameters(): array
{
return static::PARAMETERS;
}
public function getURI()
{
return static::URI;
}
public function getDonationURI(): string
{
return static::DONATION_URI;
}
public function getCacheTimeout()
{
return static::CACHE_TIMEOUT;
}
/**
* Sets the input values for a given context.
*
@@ -299,10 +256,7 @@ abstract class BridgeAbstract implements BridgeInterface
*/
protected function getInput($input)
{
if (!isset($this->inputs[$this->queriedContext][$input]['value'])) {
return null;
}
return $this->inputs[$this->queriedContext][$input]['value'];
return $this->inputs[$this->queriedContext][$input]['value'] ?? null;
}
/**
@@ -340,63 +294,6 @@ abstract class BridgeAbstract implements BridgeInterface
}
}
/**
* Get bridge configuration value
*/
public function getOption($name)
{
return $this->configuration[$name] ?? null;
}
/** {@inheritdoc} */
public function getDescription()
{
return static::DESCRIPTION;
}
/** {@inheritdoc} */
public function getMaintainer()
{
return static::MAINTAINER;
}
/** {@inheritdoc} */
public function getName()
{
return static::NAME;
}
/** {@inheritdoc} */
public function getIcon()
{
return static::URI . '/favicon.ico';
}
/** {@inheritdoc} */
public function getParameters()
{
return static::PARAMETERS;
}
/** {@inheritdoc} */
public function getURI()
{
return static::URI;
}
/** {@inheritdoc} */
public function getDonationURI()
{
return static::DONATION_URI;
}
/** {@inheritdoc} */
public function getCacheTimeout()
{
return static::CACHE_TIMEOUT;
}
/** {@inheritdoc} */
public function detectParameters($url)
{
$regex = '/^(https?:\/\/)?(www\.)?(.+?)(\/)?$/';
@@ -411,11 +308,6 @@ abstract class BridgeAbstract implements BridgeInterface
return null;
}
/**
* Loads a cached value for the specified key
*
* @return mixed Cached value or null if the key doesn't exist or has expired
*/
protected function loadCacheValue(string $key)
{
$cache = RssBridge::getCache();
@@ -423,11 +315,6 @@ abstract class BridgeAbstract implements BridgeInterface
return $cache->get($cacheKey);
}
/**
* Stores a value to cache with the specified key
*
* @param mixed $value Value to cache
*/
protected function saveCacheValue(string $key, $value, $ttl = 86400)
{
$cache = RssBridge::getCache();

View File

@@ -25,7 +25,7 @@ final class BridgeCard
/**
* Gets a single bridge card
*
* @param class-string<BridgeInterface> $bridgeClassName The bridge name
* @param class-string<BridgeAbstract> $bridgeClassName The bridge name
* @param array $formats A list of formats
* @param bool $isActive Indicates if the bridge is active or not
* @return string The bridge card
@@ -116,7 +116,7 @@ CARD;
/**
* Get the form header for a bridge card
*
* @param class-string<BridgeInterface> $bridgeClassName The bridge name
* @param class-string<BridgeAbstract> $bridgeClassName The bridge name
* @param bool $isHttps If disabled, adds a warning to the form
* @return string The form header
*/
@@ -143,7 +143,7 @@ This bridge is not fetching its content through a secure connection</div>';
/**
* Get the form body for a bridge
*
* @param class-string<BridgeInterface> $bridgeClassName The bridge name
* @param class-string<BridgeAbstract> $bridgeClassName The bridge name
* @param array $formats A list of supported formats
* @param bool $isActive Indicates if a bridge is enabled or not
* @param bool $isHttps Indicates if a bridge uses HTTPS or not

View File

@@ -34,7 +34,7 @@ final class BridgeFactory
}
}
public function create(string $name): BridgeInterface
public function create(string $name): BridgeAbstract
{
return new $name();
}

View File

@@ -1,145 +0,0 @@
<?php
/**
* This file is part of RSS-Bridge, a PHP project capable of generating RSS and
* Atom feeds for websites that don't have one.
*
* For the full license information, please view the UNLICENSE file distributed
* with this source code.
*
* @package Core
* @license http://unlicense.org/ UNLICENSE
* @link https://github.com/rss-bridge/rss-bridge
*/
/**
* The bridge interface
*
* A bridge is a class that is responsible for collecting and transforming data
* from one hosting provider into an internal representation of feed data, that
* can later be transformed into different feed formats (see {@see FormatInterface}).
*
* For this purpose, all bridges need to perform three common operations:
*
* 1. Collect data from a remote site.
* 2. Extract the required contents.
* 3. Add the contents to the internal data structure.
*
* Bridges can optionally specify parameters to customize bridge behavior based
* on user input. For example, a user could specify how many items to return in
* the feed and where to get them.
*
* In order to present a bridge on the home page, and for the purpose of bridge
* specific behaviour, additional information must be provided by the bridge:
*
* * **Name**
* The name of the bridge that can be displayed to users.
*
* * **Description**
* A brief description for the bridge that can be displayed to users.
*
* * **URI**
* A link to the hosting provider.
*
* * **Maintainer**
* The GitHub username of the bridge maintainer
*
* * **Parameters**
* A list of parameters for customization
*
* * **Icon**
* A link to the favicon of the hosting provider
*
* * **Cache timeout**
* The default cache timeout for the bridge.
*/
interface BridgeInterface
{
/**
* Collects data from the site
*
* @return void
*/
public function collectData();
/**
* Returns the value for the selected configuration
*
* @param string $input The option name
* @return mixed|null The option value or null if the input is not defined
*/
public function getOption($name);
/**
* Returns the description
*
* @return string Description
*/
public function getDescription();
/**
* Returns an array of collected items
*
* @return array Associative array of items
*/
public function getItems();
/**
* Returns the bridge maintainer
*
* @return string Bridge maintainer
*/
public function getMaintainer();
/**
* Returns the bridge name
*
* @return string Bridge name
*/
public function getName();
/**
* Returns the bridge icon
*
* @return string Bridge icon
*/
public function getIcon();
/**
* Returns the bridge parameters
*
* @return array Bridge parameters
*/
public function getParameters();
/**
* Returns the bridge URI
*
* @return string Bridge URI
*/
public function getURI();
/**
* Returns the bridge Donation URI
*
* @return string Bridge Donation URI
*/
public function getDonationURI();
/**
* Returns the cache timeout
*
* @return int Cache timeout
*/
public function getCacheTimeout();
/**
* Returns parameters from given URL or null if URL is not applicable
*
* @param string $url URL to extract parameters from
* @return array|null List of bridge parameters or null if detection failed.
*/
public function detectParameters($url);
public function getShortName(): string;
}

View File

@@ -74,7 +74,7 @@ abstract class FeedExpander extends BridgeAbstract
/**
* Collects data from an existing feed.
*
* Children should call this function in {@see BridgeInterface::collectData()}
* Children should call this function in {@see BridgeAbstract::collectData()}
* to extract a feed.
*
* @param string $url URL to the feed.

View File

@@ -15,6 +15,9 @@ final class RssBridge
}
Configuration::loadConfiguration($customConfig, getenv());
// Consider: ini_set('error_reporting', E_ALL & ~E_DEPRECATED);
date_default_timezone_set(Configuration::getConfig('system', 'timezone'));
set_exception_handler(function (\Throwable $e) {
Logger::error('Uncaught Exception', ['e' => $e]);
http_response_code(500);
@@ -57,9 +60,6 @@ final class RssBridge
}
});
// Consider: ini_set('error_reporting', E_ALL & ~E_DEPRECATED);
date_default_timezone_set(Configuration::getConfig('system', 'timezone'));
self::$httpClient = new CurlHttpClient();
$cacheFactory = new CacheFactory();
@@ -68,11 +68,6 @@ final class RssBridge
} else {
self::$cache = $cacheFactory->create();
}
if (Configuration::getConfig('authentication', 'enable')) {
$authenticationMiddleware = new AuthenticationMiddleware();
$authenticationMiddleware();
}
}
public function main(array $argv = []): void
@@ -81,6 +76,10 @@ final class RssBridge
parse_str(implode('&', array_slice($argv, 1)), $cliArgs);
$request = $cliArgs;
} else {
if (Configuration::getConfig('authentication', 'enable')) {
$authenticationMiddleware = new AuthenticationMiddleware();
$authenticationMiddleware();
}
$request = array_merge($_GET, $_POST);
}
@@ -124,10 +123,4 @@ final class RssBridge
{
return self::$cache ?? new NullCache();
}
public function clearCache()
{
$cache = self::getCache();
$cache->clear();
}
}

View File

@@ -16,6 +16,13 @@ function getContents(
) {
$httpClient = RssBridge::getHttpClient();
$httpHeadersNormalized = [];
foreach ($httpHeaders as $httpHeader) {
$parts = explode(':', $httpHeader);
$headerName = trim($parts[0]);
$headerValue = trim(implode(':', array_slice($parts, 1)));
$httpHeadersNormalized[$headerName] = $headerValue;
}
// Snagged from https://github.com/lwthiker/curl-impersonate/blob/main/firefox/curl_ff102
$defaultHttpHeaders = [
'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
@@ -27,13 +34,6 @@ function getContents(
'Sec-Fetch-User' => '?1',
'TE' => 'trailers',
];
$httpHeadersNormalized = [];
foreach ($httpHeaders as $httpHeader) {
$parts = explode(':', $httpHeader);
$headerName = trim($parts[0]);
$headerValue = trim(implode(':', array_slice($parts, 1)));
$httpHeadersNormalized[$headerName] = $headerValue;
}
$config = [
'useragent' => Configuration::getConfig('http', 'useragent'),
'timeout' => Configuration::getConfig('http', 'timeout'),
@@ -43,7 +43,7 @@ function getContents(
$maxFileSize = Configuration::getConfig('http', 'max_filesize');
if ($maxFileSize) {
// Multiply with 2^20 (1M) to the value in bytes
// Convert from MB to B by multiplying with 2^20 (1M)
$config['max_filesize'] = $maxFileSize * 2 ** 20;
}
@@ -57,7 +57,6 @@ function getContents(
/** @var Response $cachedResponse */
$cachedResponse = $cache->get($cacheKey);
if ($cachedResponse) {
// considering popping
$cachedLastModified = $cachedResponse->getHeader('last-modified');
if ($cachedLastModified) {
$cachedLastModified = new \DateTimeImmutable($cachedLastModified);
@@ -101,21 +100,13 @@ function getContents(
Debug::isEnabled() ? mb_substr($response->getBody(), 0, 500) : '',
);
// The following code must be extracted if it grows too much
$cloudflareTitles = [
'<title>Just a moment...',
'<title>Please Wait...',
'<title>Attention Required!',
'<title>Security | Glassdoor',
];
foreach ($cloudflareTitles as $cloudflareTitle) {
if (str_contains($response->getBody(), $cloudflareTitle)) {
throw new CloudFlareException($exceptionMessage, $response->getCode());
}
if (CloudFlareException::isCloudFlareResponse($response)) {
throw new CloudFlareException($exceptionMessage, $response->getCode());
}
throw new HttpException(trim($exceptionMessage), $response->getCode());
}
if ($returnFull === true) {
// todo: return the actual response object
return [
'code' => $response->getCode(),
'headers' => $response->getHeaders(),

View File

@@ -6,6 +6,21 @@ class HttpException extends \Exception
final class CloudFlareException extends HttpException
{
public static function isCloudFlareResponse(Response $response): bool
{
$cloudflareTitles = [
'<title>Just a moment...',
'<title>Please Wait...',
'<title>Attention Required!',
'<title>Security | Glassdoor',
];
foreach ($cloudflareTitles as $cloudflareTitle) {
if (str_contains($response->getBody(), $cloudflareTitle)) {
return true;
}
}
return false;
}
}
interface HttpClient
@@ -119,7 +134,7 @@ final class CurlHttpClient implements HttpClient
}
}
$statusCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
$statusCode = curl_getinfo($ch, CURLINFO_RESPONSE_CODE);
curl_close($ch);
return new Response($data, $statusCode, $responseHeaders);
}