1
0
mirror of https://github.com/RSS-Bridge/rss-bridge.git synced 2025-08-25 01:16:07 +02:00

Compare commits

..

5 Commits
fix2 ... fix1

Author SHA1 Message Date
Dag
a535121ab1 Merge remote-tracking branch 'origin/master' into fix1 2022-04-08 22:37:16 +02:00
dag
cce11964a4 feat: add a timeout option for http client (#2600) 2022-04-08 21:22:13 +02:00
Corentin Garcia
8c18c02c65 [GatesNotesBridge] Add feedaxpander bridge for Bill Gate's blog (fix issue #2386) (#2611) 2022-04-08 21:21:13 +02:00
Antoine Turmel
51d27300be [FeedMergeBridge] Add new bridge (#1385)
* [FeedMergeBridge] Add new bridge

Here is a bridge that merges two or more feeds into one.

Co-authored-by: Bocki <henning@bocklage.com>
Co-authored-by: Dag <me@dvikan.no>
2022-04-08 21:13:05 +02:00
Dag
b55c5090e6 fix: require curl extension 2022-04-08 19:54:08 +02:00
6 changed files with 190 additions and 97 deletions

View File

@@ -0,0 +1,54 @@
<?php
class FeedMergeBridge extends FeedExpander {
const MAINTAINER = 'dvikan';
const NAME = 'FeedMerge';
const URI = 'https://github.com/RSS-Bridge/rss-bridge';
const DESCRIPTION = <<<'TEXT'
This bridge merges two or more feeds into a single feed. Max 10 items are fetched from each feed.
TEXT;
const PARAMETERS = [
[
'feed_name' => [
'name' => 'Feed name',
'type' => 'text',
'exampleValue' => 'rss-bridge/FeedMerger',
],
'feed_1' => [
'name' => 'Feed url',
'type' => 'text',
'required' => true,
'exampleValue' => 'https://lorem-rss.herokuapp.com/feed?unit=day'
],
'feed_2' => ['name' => 'Feed url', 'type' => 'text'],
'feed_3' => ['name' => 'Feed url', 'type' => 'text'],
'feed_4' => ['name' => 'Feed url', 'type' => 'text'],
'feed_5' => ['name' => 'Feed url', 'type' => 'text'],
]
];
public function collectData() {
$limit = 10;
$feeds = [
$this->getInput('feed_1'),
$this->getInput('feed_2'),
$this->getInput('feed_3'),
$this->getInput('feed_4'),
$this->getInput('feed_5'),
];
// Remove empty values
$feeds = array_filter($feeds);
foreach ($feeds as $feed) {
$this->collectExpandableDatas($feed, $limit);
}
}
public function getIcon() {
return 'https://cdn.jsdelivr.net/npm/famfamfam-silk@1.0.0/dist/png/folder_feed.png';
}
public function getName() {
return $this->getInput('feed_name') ?: 'rss-bridge/FeedMerger';
}
}

View File

@@ -0,0 +1,54 @@
<?php
class GatesNotesBridge extends FeedExpander {
const MAINTAINER = 'corenting';
const NAME = 'Gates Notes';
const URI = 'https://www.gatesnotes.com';
const DESCRIPTION = 'Returns the newest articles.';
const CACHE_TIMEOUT = 21600; // 6h
protected function parseItem($item){
$item = parent::parseItem($item);
$article_html = getSimpleHTMLDOMCached($item['uri']);
if(!$article_html) {
$item['content'] .= '<p><em>Could not request ' . $this->getName() . ': ' . $item['uri'] . '</em></p>';
return $item;
}
$article_html = defaultLinkTo($article_html, $this->getURI());
$top_description = '<p>' . $article_html->find('div.article_top_description', 0)->innertext . '</p>';
$hero_image = '<img src=' . $article_html->find('img.article_top_DMT_Image', 0)->getAttribute('data-src') . '>';
$article_body = $article_html->find('div.TGN_Article_ReadTimeSection', 0);
// Convert iframe of Youtube videos to link
foreach($article_body->find('iframe') as $found) {
$iframeUrl = $found->getAttribute('src');
if ($iframeUrl) {
$text = 'Embedded Youtube video, click here to watch on Youtube.com';
$found->outertext = '<p><a href="' . $iframeUrl . '">' . $text . '</a></p>';
}
}
// Remove <link> CSS ressources
foreach($article_body->find('link') as $found) {
$linkedRessourceUrl = $found->getAttribute('href');
if (str_ends_with($linkedRessourceUrl, '.css')) {
$found->outertext = '';
}
}
$article_body = sanitize($article_body->innertext);
$item['content'] = $top_description . $hero_image . $article_body;
return $item;
}
public function collectData(){
$feed = static::URI . '/rss';
$this->collectExpandableDatas($feed);
}
}

View File

@@ -13,6 +13,7 @@
timezone = "UTC"
[http]
timeout = 60
useragent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36"
[cache]

View File

@@ -17,6 +17,7 @@ __Notice__: If a parameter is not specified in your `config.ini.php` RSS-Bridge
The configuration file is split into sections:
* [system](#system)
* [http client](#http client)
* [cache](#cache)
* [proxy](#proxy)
* [authentication](#authentication)
@@ -25,6 +26,8 @@ The configuration file is split into sections:
_System_: This section specifies system specific parameters
_Http client_: This section has http client options
_Cache_: This section is all about the caching behavior of RSS-Bridge
_Proxy_: This section can be used to specify a proxy server for RSS-Bridge to utilize for fetching contents
@@ -98,6 +101,21 @@ Allow users to disable proxy usage for specific requests.
`false` = disabled (default)
## Http client
This section provides the following parameters:
- timeout
- useragent
### timeout
Default network timeout.
### useragent
Default user agent.
## Authentication
This section provides following parameters:

View File

@@ -95,10 +95,6 @@ abstract class FeedExpander extends BridgeAbstract {
or returnServerError('Could not request ' . $url);
$rssContent = simplexml_load_string(trim($content));
if ($rssContent === false) {
throw new \Exception('Unable to parse string as xml');
}
Debug::log('Detecting feed format/version');
switch(true) {
case isset($rssContent->item[0]):

View File

@@ -128,104 +128,74 @@ function getContents($url, $header = array(), $opts = array(), $returnHeader = f
'content' => '',
);
// Use file_get_contents() if curl module is not installed
if(! function_exists('curl_version')) {
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
$httpHeaders = '';
if(is_array($header) && count($header) !== 0) {
foreach ($header as $headerL) {
$httpHeaders .= $headerL . "\r\n";
}
Debug::log('Setting headers: ' . json_encode($header));
curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
$ctx = stream_context_create(array(
'http' => array(
'header' => $httpHeaders
)
));
$data = @file_get_contents($url, 0, $ctx);
if($data === false) {
$errorCode = 500;
} else {
$errorCode = 200;
$retVal['header'] = implode("\r\n", $http_response_header);
}
$curlError = '';
$curlErrno = '';
$headerSize = 0;
$finalHeader = array();
} else {
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
if(is_array($header) && count($header) !== 0) {
Debug::log('Setting headers: ' . json_encode($header));
curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
}
curl_setopt($ch, CURLOPT_USERAGENT, Configuration::getConfig('http', 'useragent'));
curl_setopt($ch, CURLOPT_ENCODING, '');
curl_setopt($ch, CURLOPT_PROTOCOLS, CURLPROTO_HTTP | CURLPROTO_HTTPS);
if(is_array($opts) && count($opts) !== 0) {
Debug::log('Setting options: ' . json_encode($opts));
foreach($opts as $key => $value) {
curl_setopt($ch, $key, $value);
}
}
if(defined('PROXY_URL') && !defined('NOPROXY')) {
Debug::log('Setting proxy url: ' . PROXY_URL);
curl_setopt($ch, CURLOPT_PROXY, PROXY_URL);
}
// We always want the response header as part of the data!
curl_setopt($ch, CURLOPT_HEADER, true);
// Build "If-Modified-Since" header
if(!Debug::isEnabled() && $time = $cache->getTime()) { // Skip if cache file doesn't exist
Debug::log('Adding If-Modified-Since');
curl_setopt($ch, CURLOPT_TIMEVALUE, $time);
curl_setopt($ch, CURLOPT_TIMECONDITION, CURL_TIMECOND_IFMODSINCE);
}
// Enables logging for the outgoing header
curl_setopt($ch, CURLINFO_HEADER_OUT, true);
$data = curl_exec($ch);
$errorCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
$curlError = curl_error($ch);
$curlErrno = curl_errno($ch);
$curlInfo = curl_getinfo($ch);
Debug::log('Outgoing header: ' . json_encode($curlInfo));
if($data === false)
Debug::log('Cant\'t download ' . $url . ' cUrl error: ' . $curlError . ' (' . $curlErrno . ')');
$headerSize = curl_getinfo($ch, CURLINFO_HEADER_SIZE);
$header = substr($data, 0, $headerSize);
$retVal['header'] = $header;
Debug::log('Response header: ' . $header);
$headers = parseResponseHeader($header);
$finalHeader = end($headers);
curl_close($ch);
}
curl_setopt($ch, CURLOPT_USERAGENT, Configuration::getConfig('http', 'useragent'));
curl_setopt($ch, CURLOPT_TIMEOUT, Configuration::getConfig('http', 'timeout'));
curl_setopt($ch, CURLOPT_ENCODING, '');
curl_setopt($ch, CURLOPT_PROTOCOLS, CURLPROTO_HTTP | CURLPROTO_HTTPS);
if(is_array($opts) && count($opts) !== 0) {
Debug::log('Setting options: ' . json_encode($opts));
foreach($opts as $key => $value) {
curl_setopt($ch, $key, $value);
}
}
if(defined('PROXY_URL') && !defined('NOPROXY')) {
Debug::log('Setting proxy url: ' . PROXY_URL);
curl_setopt($ch, CURLOPT_PROXY, PROXY_URL);
}
// We always want the response header as part of the data!
curl_setopt($ch, CURLOPT_HEADER, true);
// Build "If-Modified-Since" header
if(!Debug::isEnabled() && $time = $cache->getTime()) { // Skip if cache file doesn't exist
Debug::log('Adding If-Modified-Since');
curl_setopt($ch, CURLOPT_TIMEVALUE, $time);
curl_setopt($ch, CURLOPT_TIMECONDITION, CURL_TIMECOND_IFMODSINCE);
}
// Enables logging for the outgoing header
curl_setopt($ch, CURLINFO_HEADER_OUT, true);
$data = curl_exec($ch);
$errorCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
$curlError = curl_error($ch);
$curlErrno = curl_errno($ch);
$curlInfo = curl_getinfo($ch);
Debug::log('Outgoing header: ' . json_encode($curlInfo));
if($data === false)
Debug::log('Cant\'t download ' . $url . ' cUrl error: ' . $curlError . ' (' . $curlErrno . ')');
$headerSize = curl_getinfo($ch, CURLINFO_HEADER_SIZE);
$header = substr($data, 0, $headerSize);
$retVal['header'] = $header;
Debug::log('Response header: ' . $header);
$headers = parseResponseHeader($header);
$finalHeader = end($headers);
curl_close($ch);
$finalHeader = array_change_key_case($finalHeader, CASE_LOWER);
switch($errorCode) {