mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2025-01-16 21:58:21 +01:00
feat: add etag support to getContents (#3893)
This commit is contained in:
parent
d5175aebcc
commit
191e5b0493
@ -163,7 +163,7 @@ PHP ini config:
|
||||
```ini
|
||||
; /etc/php/8.2/fpm/conf.d/30-rss-bridge.ini
|
||||
|
||||
max_execution_time = 20
|
||||
max_execution_time = 15
|
||||
memory_limit = 64M
|
||||
```
|
||||
|
||||
|
@ -48,7 +48,7 @@ enable_maintenance_mode = false
|
||||
|
||||
[http]
|
||||
; Operation timeout in seconds
|
||||
timeout = 30
|
||||
timeout = 15
|
||||
|
||||
; Operation retry count in case of curl error
|
||||
retries = 2
|
||||
|
@ -16,7 +16,7 @@ final class BridgeCard
|
||||
|
||||
$bridge = $bridgeFactory->create($bridgeClassName);
|
||||
|
||||
$isHttps = strpos($bridge->getURI(), 'https') === 0;
|
||||
$isHttps = str_starts_with($bridge->getURI(), 'https');
|
||||
|
||||
$uri = $bridge->getURI();
|
||||
$name = $bridge->getName();
|
||||
@ -113,8 +113,7 @@ EOD;
|
||||
}
|
||||
|
||||
if (!$isHttps) {
|
||||
$form .= '<div class="secure-warning">Warning :
|
||||
This bridge is not fetching its content through a secure connection</div>';
|
||||
$form .= '<div class="secure-warning">Warning: This bridge is not fetching its content through a secure connection</div>';
|
||||
}
|
||||
|
||||
return $form;
|
||||
|
@ -41,7 +41,7 @@ abstract class FeedExpander extends BridgeAbstract
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is overidden by bridges
|
||||
* This method is overridden by bridges
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
|
@ -7,9 +7,9 @@ declare(strict_types=1);
|
||||
*
|
||||
* Scrapes out rss 0.91, 1.0, 2.0 and atom 1.0.
|
||||
*
|
||||
* Produce arrays meant to be used inside rss-bridge.
|
||||
* Produces array meant to be used inside rss-bridge.
|
||||
*
|
||||
* The item structure is tweaked so that works with FeedItem
|
||||
* The item structure is tweaked so that it works with FeedItem
|
||||
*/
|
||||
final class FeedParser
|
||||
{
|
||||
|
@ -518,7 +518,10 @@ abstract class XPathAbstract extends BridgeAbstract
|
||||
if (strlen($value) === 0) {
|
||||
return '';
|
||||
}
|
||||
if (strpos($value, 'http://') === 0 || strpos($value, 'https://') === 0) {
|
||||
if (
|
||||
strpos($value, 'http://') === 0
|
||||
|| strpos($value, 'https://') === 0
|
||||
) {
|
||||
return $value;
|
||||
}
|
||||
|
||||
|
@ -24,6 +24,32 @@ function getContents(
|
||||
$headerValue = trim(implode(':', array_slice($parts, 1)));
|
||||
$httpHeadersNormalized[$headerName] = $headerValue;
|
||||
}
|
||||
|
||||
$requestBodyHash = null;
|
||||
if (isset($curlOptions[CURLOPT_POSTFIELDS])) {
|
||||
$requestBodyHash = md5(Json::encode($curlOptions[CURLOPT_POSTFIELDS], false));
|
||||
}
|
||||
$cacheKey = implode('_', ['server', $url, $requestBodyHash]);
|
||||
|
||||
/** @var Response $cachedResponse */
|
||||
$cachedResponse = $cache->get($cacheKey);
|
||||
if ($cachedResponse) {
|
||||
$lastModified = $cachedResponse->getHeader('last-modified');
|
||||
if ($lastModified) {
|
||||
try {
|
||||
// Some servers send Unix timestamp instead of RFC7231 date. Prepend it with @ to allow parsing as DateTime
|
||||
$lastModified = new \DateTimeImmutable((is_numeric($lastModified) ? '@' : '') . $lastModified);
|
||||
$config['if_not_modified_since'] = $lastModified->getTimestamp();
|
||||
} catch (Exception $e) {
|
||||
// Failed to parse last-modified
|
||||
}
|
||||
}
|
||||
$etag = $cachedResponse->getHeader('etag');
|
||||
if ($etag) {
|
||||
$httpHeadersNormalized['if-none-match'] = $etag;
|
||||
}
|
||||
}
|
||||
|
||||
// Snagged from https://github.com/lwthiker/curl-impersonate/blob/main/firefox/curl_ff102
|
||||
$defaultHttpHeaders = [
|
||||
'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
|
||||
@ -35,6 +61,7 @@ function getContents(
|
||||
'Sec-Fetch-User' => '?1',
|
||||
'TE' => 'trailers',
|
||||
];
|
||||
|
||||
$config = [
|
||||
'useragent' => Configuration::getConfig('http', 'useragent'),
|
||||
'timeout' => Configuration::getConfig('http', 'timeout'),
|
||||
@ -53,28 +80,6 @@ function getContents(
|
||||
$config['proxy'] = Configuration::getConfig('proxy', 'url');
|
||||
}
|
||||
|
||||
$requestBodyHash = null;
|
||||
if (isset($curlOptions[CURLOPT_POSTFIELDS])) {
|
||||
$requestBodyHash = md5(Json::encode($curlOptions[CURLOPT_POSTFIELDS], false));
|
||||
}
|
||||
$cacheKey = implode('_', ['server', $url, $requestBodyHash]);
|
||||
|
||||
/** @var Response $cachedResponse */
|
||||
$cachedResponse = $cache->get($cacheKey);
|
||||
if ($cachedResponse) {
|
||||
$cachedLastModified = $cachedResponse->getHeader('last-modified');
|
||||
if ($cachedLastModified) {
|
||||
try {
|
||||
// Some servers send Unix timestamp instead of RFC7231 date. Prepend it with @ to allow parsing as DateTime
|
||||
$cachedLastModified = new \DateTimeImmutable((is_numeric($cachedLastModified) ? '@' : '') . $cachedLastModified);
|
||||
$config['if_not_modified_since'] = $cachedLastModified->getTimestamp();
|
||||
} catch (Exception $dateTimeParseFailue) {
|
||||
// Ignore invalid 'Last-Modified' HTTP header value
|
||||
}
|
||||
}
|
||||
// todo: We should also check for Etag
|
||||
}
|
||||
|
||||
$response = $httpClient->request($url, $config);
|
||||
|
||||
switch ($response->getCode()) {
|
||||
|
@ -258,6 +258,10 @@ final class Response
|
||||
}
|
||||
|
||||
/**
|
||||
* HTTP response may have multiple headers with the same name.
|
||||
*
|
||||
* This method by default, returns only the last header.
|
||||
*
|
||||
* @return string[]|string|null
|
||||
*/
|
||||
public function getHeader(string $name, bool $all = false)
|
||||
|
Loading…
x
Reference in New Issue
Block a user