1
0
mirror of https://github.com/RSS-Bridge/rss-bridge.git synced 2025-08-16 13:34:11 +02:00

Compare commits

..

36 Commits

Author SHA1 Message Date
Dag
4da61b7922 chore: prepare 2025-01-26 release (#4424) 2025-01-26 11:16:35 +01:00
burrow335
8b1ba003a8 Add support for custom feeds in posts (#4413) 2025-01-25 18:46:12 +01:00
Bartosz Sosna
230edf602e Add lfc.pl bridge (#4419)
* Add lfc.pl bridge

* Adjust bridge

* Add comments section
2025-01-25 18:43:27 +01:00
Eugene Molotov
bd7d1734c3 [RutubeBridge] Use publication time instead of creation time (#4417)
Publication time is shown in video page itself, so it is more essential
2025-01-25 18:40:13 +01:00
Dag
dd8bc077ed feat(FeedParser): recursively parse rss modules (#4422)
Also stop excluding the media module

fix #4415
2025-01-25 18:29:01 +01:00
SebLaus
952a2d99a3 Beginning of URL not needed anymore: ErrorMessage: cURL error Could not resolve host: www.bundestag.dehttps: 6 (https://curl.haxx.se/libcurl/c/libcurl-errors.html) for https://www.bundestag.dehttps://www.bundestag.de/parlament/praesidium/parteienfinanzierung/fundstellen50000/2025/2025-inhalt-1032412 (#4420) 2025-01-25 18:28:36 +01:00
Dag
58b3cfb158 fix: drop extension requirement in feed icon url, fix #4416 (#4421) 2025-01-25 17:43:03 +01:00
Eugene Molotov
028acd0af1 [VkBridge] Unassign maintainer (#4418) 2025-01-25 17:27:36 +01:00
axor-mst
2a58f82bd8 [Formula1Bridge] API key and URL format update (#4412)
* [Formula1Bridge] API key and URL format update

* [WorldCosplayBridge] Bridge removal
2025-01-20 17:32:41 +01:00
Simon Alberny
5214581386 Fix MondeDiplo empty date (#4407) 2025-01-15 20:50:56 +01:00
Sebastian Wolf
eadea242a7 [FragDenStaatBridge] remove bridge, site provides full feed at fragdenstaat.de/artikel/feed/ (#4405) 2025-01-12 17:03:27 +01:00
Pavel Korytov
1a2c1f5bba [OllamaBridge] Add bridge (#4403)
* [OllamaBridge] Add bridge

* [OllamaBridge] Fix typo
2025-01-10 20:28:58 +01:00
vdbhb59
776a1f47f3 Update 06_Public_Hosts.md (#4401)
Updated my hosting provider & country to reflect the correct details.
2025-01-10 13:08:35 +01:00
Tone
39ecd63f72 [GolemBridge.php] changed cookie (#4399)
the cookie value changed, without the new cookie it's not possible to parse the articles
2025-01-07 23:40:55 +01:00
Pavel Korytov
0e2655fc8a [AnthropicBridge] Add Anthropic Bridge (#4398)
* [AnthropicBridge] Add Anthropic Bridge

* [AnthropicBridge] Fix lint
2025-01-06 19:10:12 +01:00
Pavel Korytov
e355276378 [EconomistWorldInBriefBridge] Update bridge (#4397)
* [EconomistWorldInBriefBridge] Fix and update bridge

* [EconomistWorldInBriefBridge] Fix lint
2025-01-06 19:08:08 +01:00
Dag
cb65125dbd feat: add section link to frontpage bridge card (#4396) 2025-01-04 20:34:36 +01:00
Dag
1d02214e12 feat: extract simple_html_dom max_file_size to config (#4395) 2025-01-04 19:43:48 +01:00
Dag
48cb7d71ed feat(telegram): add pagination fetching of messages (#4394)
* feat(telegram): add pagination fetching of messages

* docs
2025-01-04 19:00:26 +01:00
Dag
f9e9c8101e Fix 257 (#4393)
* fix(tldrtech): trim duplicate leading slashes

* fix
2025-01-03 08:41:55 +01:00
Dag
97f7df0d06 feat(feedmerge): remove duplicates based off of title too (#4392) 2025-01-03 08:17:47 +01:00
Dag
db3899f2e6 fix(legifrance): emergency repair, still semi-broken (#4391) 2025-01-03 07:23:13 +01:00
Dag
d36cd0a332 fix(ceska): item image (#4390) 2025-01-03 07:11:08 +01:00
Dag
662e0bfa95 refactor(donnons) (#4389) 2025-01-03 06:49:10 +01:00
Dag
3fc38c15a3 fix: cache 400 and 404, and refactor token auth (#4388)
* fix(cache): also cache 400 and 404 responses

* refactor(token_auth)
2025-01-03 06:19:24 +01:00
Dag
be51ba17df fix(url): disallowed wonky path (#4386) 2025-01-03 05:40:30 +01:00
Dag
c44a76ff17 refactor: remove dead code (#4385) 2025-01-03 05:04:49 +01:00
Dag
7c6d4a932c fix: upgrade hardcoded version number, fix #4382 (#4384) 2025-01-03 01:58:38 +01:00
Sebastian Wolf
45ee018a6e [MixologyBridge] add null checks for author and timestamp elements (#4383)
* [MixologyBridge] add null checks for author and timestamp elements

* [MixologyBridge] fix formatting
2025-01-03 01:43:39 +01:00
Dag
e825272987 fix(rumble): exterminate double leading slashes in item url (#4381)
Fixed for items with pub date newer than 31. jan 2025
2025-01-02 18:22:47 +01:00
Niehztog
97eebfb562 [BlizzardNewsBridge] fix BlizzardNewsBridge (#4379)
* fix BlizzardNewsBridge

* fix linter warnings

* fix linter warnings

* fix linter warnings
2025-01-02 17:44:36 +01:00
mruac
2a44a006b2 Update BlueskyBridge.php (#4367)
* Update BlueskyBridge.php

* Used human readable terms
* Include quote and reply post
* Added video support
* Replaced Youtube embed with thumbnail preview
* Added link embed preview
* Included visible alt text to images

* appease the lint

* remove unused test code

* fix unset displayName

* appease the lint
2025-01-02 17:39:07 +01:00
Sebastian Wolf
974f00cd6a [MixologyBridge] adapt to latest site changes (#4368)
* [MixologyBridge] adapt to latest site changes

* [MixologyBridge] fix category selector
2025-01-02 17:17:54 +01:00
Quentin B.
4b4d622333 [CentreFranceBridge] Update parser to handle latest website layout changes (#4372) 2025-01-02 17:14:10 +01:00
Florent V.
b4a63e7040 [EdfPrices Bridge] add HC/HP, base and EJP (#4369)
* [EdfPrices Bridge] add HC/HP, base and EJP

* [EdfPrices Bridge] lint

* [EdfPrices Bridge] fix missing variable
2025-01-02 16:45:33 +01:00
Dag
7d544f1fab feat(reddit): support video (#4380) 2025-01-02 16:33:56 +01:00
80 changed files with 1656 additions and 779 deletions

View File

@@ -15,7 +15,7 @@
* [Astalaseven](https://github.com/Astalaseven)
* [Astyan-42](https://github.com/Astyan-42)
* [austinhuang0131](https://github.com/austinhuang0131)
* [AxorPL](https://github.com/AxorPL)
* [axor-mst](https://github.com/axor-mst)
* [ayacoo](https://github.com/ayacoo)
* [az5he6ch](https://github.com/az5he6ch)
* [b1nj](https://github.com/b1nj)

View File

@@ -29,7 +29,7 @@ Requires minimum PHP 7.4.
|![Screenshot #3](/static/screenshot-3.png?raw=true)|![Screenshot #4](/static/screenshot-4.png?raw=true)|
|![Screenshot #5](/static/screenshot-5.png?raw=true)|![Screenshot #6](/static/screenshot-6.png?raw=true)|
## A subset of bridges (16/447)
## A subset of bridges (15/447)
* `CssSelectorBridge`: [Scrape out a feed using CSS selectors](https://rss-bridge.org/bridge01/#bridge-CssSelectorBridge)
* `FeedMergeBridge`: [Combine multiple feeds into one](https://rss-bridge.org/bridge01/#bridge-FeedMergeBridge)
@@ -44,7 +44,6 @@ Requires minimum PHP 7.4.
* `ThePirateBayBridge:` [Fetches torrents by search/user/category](https://rss-bridge.org/bridge01/#bridge-ThePirateBayBridge)
* `TikTokBridge`: [Fetches posts by username](https://rss-bridge.org/bridge01/#bridge-TikTokBridge)
* `TwitchBridge`: [Fetches videos from channel](https://rss-bridge.org/bridge01/#bridge-TwitchBridge)
* `VkBridge`: [Fetches posts from user/group](https://rss-bridge.org/bridge01/#bridge-VkBridge)
* `XPathBridge`: [Scrape out a feed using XPath expressions](https://rss-bridge.org/bridge01/#bridge-XPathBridge)
* `YoutubeBridge`: [Fetches videos by username/channel/playlist/search](https://rss-bridge.org/bridge01/#bridge-YoutubeBridge)
* `YouTubeCommunityTabBridge`: [Fetches posts from a channel's community tab](https://rss-bridge.org/bridge01/#bridge-YouTubeCommunityTabBridge)
@@ -72,27 +71,27 @@ useradd --shell /bin/bash --create-home rss-bridge
cd /var/www
# Create folder and change ownership
# Create folder and change its ownership to rss-bridge
mkdir rss-bridge && chown rss-bridge:rss-bridge rss-bridge/
# Become user
# Become rss-bridge
su rss-bridge
# Fetch latest master
# Clone master branch into existing folder
git clone https://github.com/RSS-Bridge/rss-bridge.git rss-bridge/
cd rss-bridge
# Copy over the default config
# Copy over the default config (OPTIONAL)
cp -v config.default.ini.php config.ini.php
# Give full permissions only to owner (rss-bridge)
chmod 700 -R ./
# Recursively give full permissions to user/owner
chmod 700 --recursive ./
# Give read and execute to others (nginx and php-fpm)
# Give read and execute to others on folder ./static
chmod o+rx ./ ./static
# Give read to others (nginx)
chmod o+r -R ./static
# Recursively give give read to others on folder ./static
chmod o+r --recursive ./static
```
Nginx config:
@@ -110,17 +109,14 @@ server {
error_log /var/log/nginx/rss-bridge.error.log;
log_not_found off;
# Intentionally not setting a root folder here
# autoindex is off by default but feels good to explicitly turn off
autoindex off;
# Intentionally not setting a root folder
# Static content only served here
location /static/ {
alias /var/www/rss-bridge/static/;
}
# Pass off to php-fpm when location is exactly /
# Pass off to php-fpm only when location is EXACTLY == /
location = / {
root /var/www/rss-bridge/;
include snippets/fastcgi-php.conf;
@@ -128,12 +124,12 @@ server {
fastcgi_pass unix:/run/php/rss-bridge.sock;
}
# Reduce spam
# Reduce log noise
location = /favicon.ico {
access_log off;
}
# Reduce spam
# Reduce log noise
location = /robots.txt {
access_log off;
}

View File

@@ -23,7 +23,7 @@ class DisplayAction implements ActionInterface
$noproxy = $request->get('_noproxy');
if (!$bridgeName) {
return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => 'Missing bridge parameter']), 400);
return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => 'Missing bridge name parameter']), 400);
}
$bridgeClassName = $this->bridgeFactory->createBridgeClassName($bridgeName);
if (!$bridgeClassName) {

View File

@@ -12,7 +12,7 @@ final class FrontpageAction implements ActionInterface
public function __invoke(Request $request): Response
{
$token = $request->attribute('token');
$token = $request->getAttribute('token');
$messages = [];
$activeBridges = 0;

View File

@@ -32,8 +32,7 @@ class AirBreizhBridge extends BridgeAbstract
public function collectData()
{
$html = '';
$html = getSimpleHTMLDOM(static::URI . 'publications/?fwp_publications_thematiques=' . $this->getInput('theme'))
or returnClientError('No results for this query.');
$html = getSimpleHTMLDOM(static::URI . 'publications/?fwp_publications_thematiques=' . $this->getInput('theme'));
foreach ($html->find('article') as $article) {
$item = [];

View File

@@ -146,7 +146,7 @@ EOT;
{
$uri = $this->getURI();
return getSimpleHTMLDOM($uri) ?: returnServerError('Could not request Amazon.');
return getSimpleHTMLDOM($uri);
}
private function scrapePriceFromMetrics($html)

147
bridges/AnthropicBridge.php Normal file
View File

@@ -0,0 +1,147 @@
<?php
class AnthropicBridge extends BridgeAbstract
{
const MAINTAINER = 'sqrtminusone';
const NAME = 'Anthropic Research Bridge';
const URI = 'https://www.anthropic.com';
const CACHE_TIMEOUT = 3600; // 1 hour
const DESCRIPTION = 'Returns research publications from Anthropic';
const PARAMETERS = [
'' => [
'limit' => [
'name' => 'Limit',
'type' => 'number',
'required' => true,
'defaultValue' => 10
],
]
];
public function collectData()
{
// Anthropic sometimes returns 500 for no reason. The contents are still there.
$html = $this->getHTMLIgnoreError(self::URI . '/research');
$limit = $this->getInput('limit');
$page_data = $this->extractPageData($html);
$pages = $this->parsePageData($page_data);
for ($i = 0; $i < min(count($pages), $limit); $i++) {
$page = $pages[$i];
$page['content'] = $this->parsePage($page['uri']);
$this->items[] = $page;
}
}
private function getHTMLIgnoreError($url, $ttl = null)
{
if ($ttl != null) {
$cacheKey = 'pages_' . $url;
$content = $this->cache->get($cacheKey);
if ($content) {
return str_get_html($content);
}
}
try {
$content = getContents($url);
} catch (HttpException $e) {
$content = $e->response->getBody();
}
if ($ttl != null) {
$this->cache->set($cacheKey, $content, $ttl);
}
return str_get_html($content);
}
private function extractPageData($html)
{
foreach ($html->find('script') as $script) {
$js_code = $script->innertext;
if (!str_starts_with($js_code, 'self.__next_f.push(')) {
continue;
}
$push_data = (string)json_decode(mb_substr($js_code, 22, mb_strlen($js_code) - 2 - 22));
$square_bracket = mb_strpos($push_data, '[');
$push_array = json_decode(mb_substr($push_data, $square_bracket), true);
if ($push_array == null || count($push_array) < 4) {
continue;
}
$page_data = $push_array[3];
if ($page_data != null && array_key_exists('page', $page_data)) {
return $page_data;
}
}
}
private function parsePageData($page_data)
{
$result = [];
foreach ($page_data['page']['sections'] as $section) {
if (
!array_key_exists('internalName', $section) ||
$section['internalName'] != 'Research Teams'
) {
continue;
}
foreach ($section['tabPages'] as $tabPage) {
if ($tabPage['label'] != 'Overview') {
continue;
}
foreach ($tabPage['sections'] as $section1) {
if (
!array_key_exists('title', $section1)
|| $section1['title'] != 'Publications'
) {
continue;
}
foreach ($section1['posts'] as $post) {
$enc = [];
if ($post['cta'] != null && array_key_exists('url', $post['cta'])) {
$enc = [$post['cta']['url']];
}
$result[] = [
'title' => $post['title'],
'timestamp' => $post['publishedOn'],
'uri' => self::URI . '/research/' . $post['slug']['current'],
'categories' => array_map(
fn($s) => $s['label'],
$post['subjects'],
),
'enclosures' => $enc,
];
}
break;
}
break;
}
break;
}
return $result;
}
private function parsePage($url)
{
// Again, 500 for no reason.
$html = $this->getHTMLIgnoreError($url, 7 * 24 * 60 * 60);
$content = '';
// Main content
$main = $html->find('div[class*="PostDetail_post-detail"] > article', 0);
// Mostly YouTube videos
$iframes = $main->find('iframe');
foreach ($iframes as $iframe) {
$iframe->parent->removeAttribute('style');
$iframe->outertext = '<a href="' . $iframe->src . '">' . $iframe->src . '</a>';
}
$main = convertLazyLoading($main);
$main = defaultLinkTo($main, self::URI);
$content .= $main;
return $content;
}
}

View File

@@ -105,8 +105,7 @@ class AssociatedPressNewsBridge extends BridgeAbstract
private function collectCardData()
{
$json = getContents($this->getTagURI())
or returnServerError('Could not request: ' . $this->getTagURI());
$json = getContents($this->getTagURI());
$tagContents = json_decode($json, true);

View File

@@ -29,7 +29,7 @@ class BAEBridge extends BridgeAbstract
public function collectData()
{
$url = $this->getURI();
$html = getSimpleHTMLDOM($url) or returnClientError('No results for this query.');
$html = getSimpleHTMLDOM($url);
$annonces = $html->find('main article');
foreach ($annonces as $annonce) {

View File

@@ -93,8 +93,7 @@ class BandcampDailyBridge extends BridgeAbstract
public function collectData()
{
$html = getSimpleHTMLDOM($this->getURI())
or returnServerError('Could not request: ' . $this->getURI());
$html = getSimpleHTMLDOM($this->getURI());
$html = defaultLinkTo($html, self::URI);
@@ -105,8 +104,7 @@ class BandcampDailyBridge extends BridgeAbstract
$articlePath = $article->find('a.title', 0)->href;
$articlePageHtml = getSimpleHTMLDOMCached($articlePath, 3600)
or returnServerError('Could not request: ' . $articlePath);
$articlePageHtml = getSimpleHTMLDOMCached($articlePath, 3600);
$item['uri'] = $articlePath;
$item['title'] = $articlePageHtml->find('article-title', 0)->innertext;

View File

@@ -1,6 +1,6 @@
<?php
class BlizzardNewsBridge extends XPathAbstract
class BlizzardNewsBridge extends BridgeAbstract
{
const NAME = 'Blizzard News';
const URI = 'https://news.blizzard.com';
@@ -35,33 +35,73 @@ class BlizzardNewsBridge extends XPathAbstract
];
const CACHE_TIMEOUT = 3600;
const XPATH_EXPRESSION_ITEM = '/html/body/div/div[4]/div[2]/div[2]/div/div/section/ol/li/article';
const XPATH_EXPRESSION_ITEM_TITLE = './/div/div[2]/h2';
const XPATH_EXPRESSION_ITEM_CONTENT = './/div[@class="ArticleListItem-description"]/div[@class="h6"]/text()';
const XPATH_EXPRESSION_ITEM_URI = './/a[@class="ArticleLink ArticleLink"]/@href';
const XPATH_EXPRESSION_ITEM_AUTHOR = '';
const XPATH_EXPRESSION_ITEM_TIMESTAMP = './/time[@class="ArticleListItem-footerTimestamp"]/@timestamp';
const XPATH_EXPRESSION_ITEM_ENCLOSURES = './/div[@class="ArticleListItem-image"]/@style';
const XPATH_EXPRESSION_ITEM_CATEGORIES = './/div[@class="ArticleListItem-label"]';
const SETTING_FIX_ENCODING = true;
private const PRODUCT_IDS = [
'blt525c436e4a1b0a97',
'blt54fbd3787a705054',
'blt2031aef34200656d',
'blt795c314400d7ded9',
'blt5cfc6affa3ca0638',
'blt2e50e1521bb84dc6',
'blt376fb94931906b6f',
'blt81d46fcb05ab8811',
'bltede2389c0a8885aa',
'blt24859ba8086fb294',
'blte27d02816a8ff3e1',
'blt2caca37e42f19839',
'blt90855744d00cd378',
'bltec70ad0ea4fd6d1d',
'blt500c1f8b5470bfdb'
];
private const API_PATH = '/api/news/blizzard?';
/**
* Source Web page URL (should provide either HTML or XML content)
* @return string
*/
protected function getSourceUrl()
private function getSourceUrl(): string
{
$locale = $this->getInput('locale');
if ('zh-cn' === $locale) {
return 'https://cn.news.blizzard.com';
$baseUrl = 'https://cn.news.blizzard.com' . self::API_PATH;
} else {
$baseUrl = 'https://news.blizzard.com/' . $locale . self::API_PATH;
}
return 'https://news.blizzard.com/' . $locale;
return $baseUrl .= http_build_query([
'feedCxpProductIds' => self::PRODUCT_IDS
]);
}
public function collectData()
{
$feedContent = json_decode(getContents($this->getSourceUrl()), true);
foreach ($feedContent['feed']['contentItems'] as $entry) {
$properties = $entry['properties'];
$item = [];
$item['title'] = $this->filterChars($properties['title']);
$item['content'] = $this->filterChars($properties['summary']);
$item['uri'] = $properties['newsUrl'];
$item['author'] = $this->filterChars($properties['author']);
$item['timestamp'] = strtotime($properties['lastUpdated']);
$item['enclosures'] = [$properties['staticAsset']['imageUrl']];
$item['categories'] = [$this->filterChars($properties['cxpProduct']['title'])];
$this->items[] = $item;
}
}
private function filterChars($content)
{
return htmlspecialchars($content, ENT_XML1);
}
public function getIcon()
{
return <<<icon
https://blznews.akamaized.net/images/favicon-cb34a003c6f2f637ee8f4f7b406f3b9b120b918c04cabec7f03a760e708977ea9689a1c638f4396def8dce7b202cd007eae91946cc3c4a578aa8b5694226cfc6.ico
https://dfbmfbnnydoln.cloudfront.net/production/images/favicons/favicon.ba01bb119359d74970b02902472fd82e96b5aba7.ico
icon;
}
}

View File

@@ -2,10 +2,12 @@
class BlueskyBridge extends BridgeAbstract
{
const NAME = 'Bluesky';
//Initial PR by [RSSBridge contributors](https://github.com/RSS-Bridge/rss-bridge/issues/4058).
//Modified from [©DIYgod and contributors at RSSHub](https://github.com/DIYgod/RSSHub/tree/master/lib/routes/bsky), MIT License';
const NAME = 'Bluesky Bridge';
const URI = 'https://bsky.app';
const DESCRIPTION = 'Fetches posts from Bluesky';
const MAINTAINER = 'Code modified from rsshub (TonyRL https://github.com/TonyRL) and expanded';
const MAINTAINER = 'mruac';
const PARAMETERS = [
[
'data_source' => [
@@ -17,24 +19,39 @@ class BlueskyBridge extends BridgeAbstract
],
'title' => 'Select the type of data source to fetch from Bluesky.'
],
'handle' => [
'name' => 'User Handle',
'user_id' => [
'name' => 'User Handle or DID',
'type' => 'text',
'required' => true,
'exampleValue' => 'jackdodo.bsky.social',
'title' => 'Handle found in URL'
'exampleValue' => 'did:plc:z72i7hdynmk6r22z27h6tvur',
'title' => 'ATProto / Bsky.app handle or DID'
],
'filter' => [
'name' => 'Filter',
'feed_filter' => [
'name' => 'Feed type',
'type' => 'list',
'defaultValue' => 'posts_and_author_threads',
'values' => [
'posts_and_author_threads' => 'posts_and_author_threads',
'posts_with_replies' => 'posts_with_replies',
'posts_no_replies' => 'posts_no_replies',
'posts_with_media' => 'posts_with_media',
],
'title' => 'Combinations of post/repost types to include in response.'
'Posts feed' => 'posts_and_author_threads',
'All posts and replies' => 'posts_with_replies',
'Root posts only' => 'posts_no_replies',
'Media only' => 'posts_with_media',
]
],
'include_reposts' => [
'name' => 'Include Reposts?',
'type' => 'checkbox',
'defaultValue' => 'checked'
],
'include_reply_context' => [
'name' => 'Include Reply context?',
'type' => 'checkbox'
],
'verbose_title' => [
'name' => 'Use verbose feed item titles?',
'type' => 'checkbox'
]
]
];
@@ -44,7 +61,11 @@ class BlueskyBridge extends BridgeAbstract
public function getName()
{
if (isset($this->profile)) {
return sprintf('%s (@%s) - Bluesky', $this->profile['displayName'], $this->profile['handle']);
if ($this->profile['handle'] === 'handle.invalid') {
return sprintf('Bluesky - %s', $this->profile['displayName']);
} else {
return sprintf('Bluesky - %s (@%s)', $this->profile['displayName'], $this->profile['handle']);
}
}
return parent::getName();
}
@@ -52,7 +73,11 @@ class BlueskyBridge extends BridgeAbstract
public function getURI()
{
if (isset($this->profile)) {
return self::URI . '/profile/' . $this->profile['handle'];
if ($this->profile['handle'] === 'handle.invalid') {
return self::URI . '/profile/' . $this->profile['did'];
} else {
return self::URI . '/profile/' . $this->profile['handle'];
}
}
return parent::getURI();
}
@@ -77,118 +102,373 @@ class BlueskyBridge extends BridgeAbstract
{
$description = '';
$externalUri = $external['uri'];
$externalTitle = htmlspecialchars($external['title'], ENT_QUOTES, 'UTF-8');
$externalDescription = htmlspecialchars($external['description'], ENT_QUOTES, 'UTF-8');
$externalTitle = e($external['title']);
$externalDescription = e($external['description']);
$thumb = $external['thumb'] ?? null;
if (preg_match('/youtube\.com\/watch\?v=([^\&\?\/]+)/', $externalUri, $id) || preg_match('/youtu\.be\/([^\&\?\/]+)/', $externalUri, $id)) {
$videoId = $id[1];
$description .= "<p>External Link: <a href=\"$externalUri\">$externalTitle</a></p>";
$description .= "<iframe width=\"560\" height=\"315\" src=\"https://www.youtube.com/embed/$videoId\" frameborder=\"0\" allowfullscreen></iframe>";
if (preg_match('/http(|s):\/\/media\.tenor\.com/', $externalUri)) {
//tenor gif embed
$tenorInterstitial = str_replace('media.tenor.com', 'media1.tenor.com/m', $externalUri);
$description .= "<figure><a href=\"$tenorInterstitial\"><img src=\"$externalUri\"/></a><figcaption>$externalTitle</figcaption></figure>";
} else {
$description .= "<p>External Link: <a href=\"$externalUri\">$externalTitle</a></p>";
$description .= "<p>$externalDescription</p>";
if ($thumb) {
$thumbUrl = 'https://cdn.bsky.app/img/feed_thumbnail/plain/' . $did . '/' . $thumb['ref']['$link'] . '@jpeg';
$description .= "<p><a href=\"$externalUri\"><img src=\"$thumbUrl\" alt=\"External Thumbnail\" /></a></p>";
}
//link embed preview
$host = parse_url($externalUri)['host'];
$thumbDesc = $thumb ? ('<img src="https://cdn.bsky.app/img/feed_thumbnail/plain/' . $did . '/' . $thumb['ref']['$link'] . '@jpeg"/>') : '';
$externalDescription = strlen($externalDescription) > 0 ? "<figcaption>($host) $externalDescription</figcaption>" : '';
$description .= '<br><blockquote><b><a href="' . $externalUri . '">' . $externalTitle . '</a></b>';
$description .= '<figure>' . $thumbDesc . $externalDescription . '</figure></blockquote>';
}
return $description;
}
private function textToDescription($text)
private function textToDescription($record)
{
$text = nl2br(htmlspecialchars($text, ENT_QUOTES, 'UTF-8'));
$text = preg_replace('/(https?:\/\/[^\s]+)/i', '<a href="$1">$1</a>', $text);
if (isset($record['value'])) {
$record = $record['value'];
}
$text = $record['text'];
$text_copy = $text;
$text = nl2br(e($text));
if (isset($record['facets'])) {
$facets = $record['facets'];
foreach ($facets as $facet) {
if ($facet['features'][0]['$type'] === 'app.bsky.richtext.facet#link') {
$substring = substr($text_copy, $facet['index']['byteStart'], $facet['index']['byteEnd'] - $facet['index']['byteStart']);
$text = str_replace($substring, '<a href="' . $facet['features'][0]['uri'] . '">' . $substring . '</a>', $text);
}
}
}
return $text;
}
public function collectData()
{
$handle = $this->getInput('handle');
$filter = $this->getInput('filter') ?: 'posts_and_author_threads';
$user_id = $this->getInput('user_id');
$handle_match = preg_match('/(?:[a-zA-Z]*\.)+([a-zA-Z](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)/', $user_id, $handle_res); //gets the TLD in $handle_match[1]
$did_match = preg_match('/did:plc:[a-z2-7]{24}/', $user_id); //https://github.com/did-method-plc/did-method-plc#identifier-syntax
$exclude = ['alt', 'arpa', 'example', 'internal', 'invalid', 'local', 'localhost', 'onion']; //https://en.wikipedia.org/wiki/Top-level_domain#Reserved_domains
if ($handle_match == true && array_search($handle_res[1], $exclude) == false) {
//valid bsky handle
$did = $this->resolveHandle($user_id);
} elseif ($did_match == true) {
//valid DID
$did = $user_id;
} else {
returnClientError('Invalid ATproto handle or DID provided.');
}
$filter = $this->getInput('feed_filter') ?: 'posts_and_author_threads';
$replyContext = $this->getInput('include_reply_context');
$did = $this->resolveHandle($handle);
$this->profile = $this->getProfile($did);
$authorFeed = $this->getAuthorFeed($did, $filter);
foreach ($authorFeed['feed'] as $post) {
$postRecord = $post['post']['record'];
$item = [];
$item['uri'] = self::URI . '/profile/' . $post['post']['author']['handle'] . '/post/' . explode('app.bsky.feed.post/', $post['post']['uri'])[1];
$item['title'] = strtok($post['post']['record']['text'], "\n");
$item['timestamp'] = strtotime($post['post']['record']['createdAt']);
$item['author'] = $this->profile['displayName'];
$item['uri'] = self::URI . '/profile/' . $this->fallbackAuthor($post['post']['author'], 'url') . '/post/' . explode('app.bsky.feed.post/', $post['post']['uri'])[1];
$item['title'] = $this->getInput('verbose_title') ? $this->generateVerboseTitle($post) : strtok($postRecord['text'], "\n");
$item['timestamp'] = strtotime($postRecord['createdAt']);
$item['author'] = $this->fallbackAuthor($post['post']['author'], 'display');
$description = $this->textToDescription($post['post']['record']['text']);
$postAuthorDID = $post['post']['author']['did'];
$postAuthorHandle = $post['post']['author']['handle'] !== 'handle.invalid' ? '<i>@' . $post['post']['author']['handle'] . '</i> ' : '';
$postDisplayName = $post['post']['author']['displayName'] ?? '';
$postDisplayName = e($postDisplayName);
$postUri = $item['uri'];
// Retrieve DID for constructing image URLs
$authorDid = $post['post']['author']['did'];
if (isset($post['post']['record']['embed']['$type']) && $post['post']['record']['embed']['$type'] === 'app.bsky.embed.external') {
$description .= $this->parseExternal($post['post']['record']['embed']['external'], $authorDid);
if (Debug::isEnabled()) {
$url = explode('/', $post['post']['uri']);
error_log('https://bsky.app/profile/' . $url[2] . '/post/' . $url[4]);
}
if (isset($post['post']['record']['embed']['$type']) && $post['post']['record']['embed']['$type'] === 'app.bsky.embed.video') {
$thumbnail = $post['post']['embed']['thumbnail'] ?? null;
if ($thumbnail) {
$itemUri = self::URI . '/profile/' . $post['post']['author']['handle'] . '/post/' . explode('app.bsky.feed.post/', $post['post']['uri'])[1];
$description .= "<p><a href=\"$itemUri\"><img src=\"$thumbnail\" alt=\"Video Thumbnail\" /></a></p>";
$description = '';
$description .= '<p>';
//post
$description .= $this->getPostDescription(
$postDisplayName,
$postAuthorHandle,
$postUri,
$postRecord,
'post'
);
if (isset($postRecord['embed']['$type'])) {
//post link embed
if ($postRecord['embed']['$type'] === 'app.bsky.embed.external') {
$description .= $this->parseExternal($postRecord['embed']['external'], $postAuthorDID);
} elseif (
$postRecord['embed']['$type'] === 'app.bsky.embed.recordWithMedia' &&
$postRecord['embed']['media']['$type'] === 'app.bsky.embed.external'
) {
$description .= $this->parseExternal($postRecord['embed']['media']['external'], $postAuthorDID);
}
//post images
if (
$postRecord['embed']['$type'] === 'app.bsky.embed.images' ||
(
$postRecord['embed']['$type'] === 'app.bsky.embed.recordWithMedia' &&
$postRecord['embed']['media']['$type'] === 'app.bsky.embed.images'
)
) {
$images = $post['post']['embed']['images'] ?? $post['post']['embed']['media']['images'];
foreach ($images as $image) {
$description .= $this->getPostImageDescription($image);
}
}
//post video
if (
$postRecord['embed']['$type'] === 'app.bsky.embed.video' ||
(
$postRecord['embed']['$type'] === 'app.bsky.embed.recordWithMedia' &&
$postRecord['embed']['media']['$type'] === 'app.bsky.embed.video'
)
) {
$description .= $this->getPostVideoDescription(
$postRecord['embed']['video'] ?? $postRecord['embed']['media']['video'],
$postAuthorDID
);
}
}
$description .= '</p>';
if (isset($post['post']['record']['embed']['$type']) && $post['post']['record']['embed']['$type'] === 'app.bsky.embed.recordWithMedia#view') {
$thumbnail = $post['post']['embed']['media']['thumbnail'] ?? null;
$playlist = $post['post']['embed']['media']['playlist'] ?? null;
if ($thumbnail) {
$description .= "<p><video controls poster=\"$thumbnail\">";
$description .= "<source src=\"$playlist\" type=\"application/x-mpegURL\">";
$description .= 'Video source not supported</video></p>';
}
}
//quote post
if (
isset($postRecord['embed']) &&
(
$postRecord['embed']['$type'] === 'app.bsky.embed.record' ||
$postRecord['embed']['$type'] === 'app.bsky.embed.recordWithMedia'
) &&
isset($post['post']['embed']['record'])
) {
$description .= '<p>';
$quotedRecord = $post['post']['embed']['record']['record'] ?? $post['post']['embed']['record'];
if (!empty($post['post']['record']['embed']['images'])) {
foreach ($post['post']['record']['embed']['images'] as $image) {
$linkRef = $image['image']['ref']['$link'];
$thumbnailUrl = $this->resolveThumbnailUrl($authorDid, $linkRef);
$fullsizeUrl = $this->resolveFullsizeUrl($authorDid, $linkRef);
$description .= "<br /><br /><a href=\"$fullsizeUrl\"><img src=\"$thumbnailUrl\" alt=\"Image\"></a>";
}
}
if (isset($quotedRecord['notFound']) && $quotedRecord['notFound']) { //deleted post
$description .= 'Quoted post deleted.';
} elseif (isset($quotedRecord['detached']) && $quotedRecord['detached']) { //detached quote
$uri_explode = explode('/', $quotedRecord['uri']);
$uri_reconstructed = self::URI . '/profile/' . $uri_explode[2] . '/post/' . $uri_explode[4];
$description .= '<a href="' . $uri_reconstructed . '">Quoted post detached.</a>';
} elseif (isset($quotedRecord['blocked']) && $quotedRecord['blocked']) { //blocked by quote author
$description .= 'Author of quoted post has blocked OP.';
} elseif (($quotedRecord['$type'] ?? '') === 'app.bsky.feed.defs#generatorView') {
$description .= '</p>';
$description .= $this->getGeneratorViewDescription($quotedRecord);
$description .= '<p>';
} else {
$quotedAuthorDid = $quotedRecord['author']['did'];
$quotedDisplayName = $quotedRecord['author']['displayName'] ?? '';
$quotedDisplayName = e($quotedDisplayName);
$quotedAuthorHandle = $quotedRecord['author']['handle'] !== 'handle.invalid' ? '<i>@' . $quotedRecord['author']['handle'] . '</i>' : '';
// Enhanced handling for quote posts with images
if (isset($post['post']['record']['embed']) && $post['post']['record']['embed']['$type'] === 'app.bsky.embed.record') {
$quotedRecord = $post['post']['record']['embed']['record'];
$quotedAuthor = $post['post']['embed']['record']['author']['handle'] ?? null;
$quotedDisplayName = $post['post']['embed']['record']['author']['displayName'] ?? null;
$quotedText = $post['post']['embed']['record']['value']['text'] ?? null;
if ($quotedAuthor && isset($quotedRecord['uri'])) {
$parts = explode('/', $quotedRecord['uri']);
$quotedPostId = end($parts);
$quotedPostUri = self::URI . '/profile/' . $quotedAuthor . '/post/' . $quotedPostId;
}
$quotedPostUri = self::URI . '/profile/' . $this->fallbackAuthor($quotedRecord['author'], 'url') . '/post/' . $quotedPostId;
if ($quotedText) {
$description .= '<hr /><strong>Quote from ' . htmlspecialchars($quotedDisplayName) . ' (@ ' . htmlspecialchars($quotedAuthor) . '):</strong><br />';
$description .= $this->textToDescription($quotedText);
if (isset($quotedPostUri)) {
$description .= "<p><a href=\"$quotedPostUri\">View original quote post</a></p>";
//quoted post - post
$description .= $this->getPostDescription(
$quotedDisplayName,
$quotedAuthorHandle,
$quotedPostUri,
$quotedRecord,
'quote'
);
if (isset($quotedRecord['value']['embed']['$type'])) {
//quoted post - post link embed
if ($quotedRecord['value']['embed']['$type'] === 'app.bsky.embed.external') {
$description .= $this->parseExternal($quotedRecord['value']['embed']['external'], $quotedAuthorDid);
}
//quoted post - post video
if (
$quotedRecord['value']['embed']['$type'] === 'app.bsky.embed.video' ||
(
$quotedRecord['value']['embed']['$type'] === 'app.bsky.embed.recordWithMedia' &&
$quotedRecord['value']['embed']['media']['$type'] === 'app.bsky.embed.video'
)
) {
$description .= $this->getPostVideoDescription(
$quotedRecord['value']['embed']['video'] ?? $quotedRecord['value']['embed']['media']['video'],
$quotedAuthorDid
);
}
//quoted post - post images
if (
$quotedRecord['value']['embed']['$type'] === 'app.bsky.embed.images' ||
(
$quotedRecord['value']['embed']['$type'] === 'app.bsky.embed.recordWithMedia' &&
$quotedRecord['value']['embed']['media']['$type'] === 'app.bsky.embed.images'
)
) {
foreach ($quotedRecord['embeds'] as $embed) {
if (
$embed['$type'] === 'app.bsky.embed.images#view' ||
($embed['$type'] === 'app.bsky.embed.recordWithMedia#view' && $embed['media']['$type'] === 'app.bsky.embed.images#view')
) {
$images = $embed['images'] ?? $embed['media']['images'];
foreach ($images as $image) {
$description .= $this->getPostImageDescription($image);
}
}
}
}
}
}
$description .= '</p>';
}
if (isset($post['post']['embed']['record']['value']['embed']['images'])) {
$quotedImages = $post['post']['embed']['record']['value']['embed']['images'];
foreach ($quotedImages as $image) {
$linkRef = $image['image']['ref']['$link'] ?? null;
if ($linkRef) {
$quotedAuthorDid = $post['post']['embed']['record']['author']['did'] ?? null;
$thumbnailUrl = $this->resolveThumbnailUrl($quotedAuthorDid, $linkRef);
$fullsizeUrl = $this->resolveFullsizeUrl($quotedAuthorDid, $linkRef);
$description .= "<br /><br /><a href=\"$fullsizeUrl\"><img src=\"$thumbnailUrl\" alt=\"Quoted Image\"></a>";
//reply
if ($replyContext && isset($post['reply']) && !isset($post['reply']['parent']['notFound'])) {
$replyPost = $post['reply']['parent'];
$replyPostRecord = $replyPost['record'];
$description .= '<hr/>';
$description .= '<p>';
$replyPostAuthorDID = $replyPost['author']['did'];
$replyPostAuthorHandle = $replyPost['author']['handle'] !== 'handle.invalid' ? '<i>@' . $replyPost['author']['handle'] . '</i> ' : '';
$replyPostDisplayName = $replyPost['author']['displayName'] ?? '';
$replyPostDisplayName = e($replyPostDisplayName);
$replyPostUri = self::URI . '/profile/' . $this->fallbackAuthor($replyPost['author'], 'url') . '/post/' . explode('app.bsky.feed.post/', $replyPost['uri'])[1];
// reply post
$description .= $this->getPostDescription(
$replyPostDisplayName,
$replyPostAuthorHandle,
$replyPostUri,
$replyPostRecord,
'reply'
);
if (isset($replyPostRecord['embed']['$type'])) {
//post link embed
if ($replyPostRecord['embed']['$type'] === 'app.bsky.embed.external') {
$description .= $this->parseExternal($replyPostRecord['embed']['external'], $replyPostAuthorDID);
} elseif (
$replyPostRecord['embed']['$type'] === 'app.bsky.embed.recordWithMedia' &&
$replyPostRecord['embed']['media']['$type'] === 'app.bsky.embed.external'
) {
$description .= $this->parseExternal($replyPostRecord['embed']['media']['external'], $replyPostAuthorDID);
}
//post images
if (
$replyPostRecord['embed']['$type'] === 'app.bsky.embed.images' ||
(
$replyPostRecord['embed']['$type'] === 'app.bsky.embed.recordWithMedia' &&
$replyPostRecord['embed']['media']['$type'] === 'app.bsky.embed.images'
)
) {
$images = $replyPost['embed']['images'] ?? $replyPost['embed']['media']['images'];
foreach ($images as $image) {
$description .= $this->getPostImageDescription($image);
}
}
//post video
if (
$replyPostRecord['embed']['$type'] === 'app.bsky.embed.video' ||
(
$replyPostRecord['embed']['$type'] === 'app.bsky.embed.recordWithMedia' &&
$replyPostRecord['embed']['media']['$type'] === 'app.bsky.embed.video'
)
) {
$description .= $this->getPostVideoDescription(
$replyPostRecord['embed']['video'] ?? $replyPostRecord['embed']['media']['video'],
$replyPostAuthorDID
);
}
}
$description .= '</p>';
//quote post
if (
isset($replyPostRecord['embed']) &&
($replyPostRecord['embed']['$type'] === 'app.bsky.embed.record' || $replyPostRecord['embed']['$type'] === 'app.bsky.embed.recordWithMedia') &&
isset($replyPost['embed']['record'])
) {
$description .= '<p>';
$replyQuotedRecord = $replyPost['embed']['record']['record'] ?? $replyPost['embed']['record'];
if (isset($replyQuotedRecord['notFound']) && $replyQuotedRecord['notFound']) { //deleted post
$description .= 'Quoted post deleted.';
} elseif (isset($replyQuotedRecord['detached']) && $replyQuotedRecord['detached']) { //detached quote
$uri_explode = explode('/', $replyQuotedRecord['uri']);
$uri_reconstructed = self::URI . '/profile/' . $uri_explode[2] . '/post/' . $uri_explode[4];
$description .= '<a href="' . $uri_reconstructed . '">Quoted post detached.</a>';
} elseif (isset($replyQuotedRecord['blocked']) && $replyQuotedRecord['blocked']) { //blocked by quote author
$description .= 'Author of quoted post has blocked OP.';
} elseif (($replyQuotedRecord['$type'] ?? '') === 'app.bsky.feed.defs#generatorView') {
$description .= '</p>';
$description .= $this->getGeneratorViewDescription($replyQuotedRecord);
$description .= '<p>';
} else {
$quotedAuthorDid = $replyQuotedRecord['author']['did'];
$quotedDisplayName = $replyQuotedRecord['author']['displayName'] ?? '';
$quotedDisplayName = e($quotedDisplayName);
$quotedAuthorHandle = $replyQuotedRecord['author']['handle'] !== 'handle.invalid' ? '<i>@' . $replyQuotedRecord['author']['handle'] . '</i>' : '';
$parts = explode('/', $replyQuotedRecord['uri']);
$quotedPostId = end($parts);
$quotedPostUri = self::URI . '/profile/' . $this->fallbackAuthor($replyQuotedRecord['author'], 'url') . '/post/' . $quotedPostId;
//quoted post - post
$description .= $this->getPostDescription(
$quotedDisplayName,
$quotedAuthorHandle,
$quotedPostUri,
$replyQuotedRecord,
'quote'
);
if (isset($replyQuotedRecord['value']['embed']['$type'])) {
//quoted post - post link embed
if ($replyQuotedRecord['value']['embed']['$type'] === 'app.bsky.embed.external') {
$description .= $this->parseExternal($replyQuotedRecord['value']['embed']['external'], $quotedAuthorDid);
}
//quoted post - post video
if (
$replyQuotedRecord['value']['embed']['$type'] === 'app.bsky.embed.video' ||
(
$replyQuotedRecord['value']['embed']['$type'] === 'app.bsky.embed.recordWithMedia' &&
$replyQuotedRecord['value']['embed']['media']['$type'] === 'app.bsky.embed.video'
)
) {
$description .= $this->getPostVideoDescription(
$replyQuotedRecord['value']['embed']['video'] ?? $replyQuotedRecord['value']['embed']['media']['video'],
$quotedAuthorDid
);
}
//quoted post - post images
if (
$replyQuotedRecord['value']['embed']['$type'] === 'app.bsky.embed.images' ||
(
$replyQuotedRecord['value']['embed']['$type'] === 'app.bsky.embed.recordWithMedia' &&
$replyQuotedRecord['value']['embed']['media']['$type'] === 'app.bsky.embed.images'
)
) {
foreach ($replyQuotedRecord['embeds'] as $embed) {
if (
$embed['$type'] === 'app.bsky.embed.images#view' ||
($embed['$type'] === 'app.bsky.embed.recordWithMedia#view' && $embed['media']['$type'] === 'app.bsky.embed.images#view')
) {
$images = $embed['images'] ?? $embed['media']['images'];
foreach ($images as $image) {
$description .= $this->getPostImageDescription($image);
}
}
}
}
}
}
$description .= '</p>';
}
}
@@ -197,6 +477,98 @@ class BlueskyBridge extends BridgeAbstract
}
}
private function getPostVideoDescription(array $video, $authorDID)
{
//https://video.bsky.app/watch/$did/$cid/thumbnail.jpg
$videoCID = $video['ref']['$link'];
$videoMime = $video['mimeType'];
$thumbnail = "poster=\"https://video.bsky.app/watch/$authorDID/$videoCID/thumbnail.jpg\"" ?? '';
$videoURL = "https://bsky.social/xrpc/com.atproto.sync.getBlob?did=$authorDID&cid=$videoCID";
return "<figure><video loop $thumbnail controls src=\"$videoURL\" type=\"$videoMime\"/></figure>";
}
private function getPostImageDescription(array $image)
{
$thumbnailUrl = $image['thumb'];
$fullsizeUrl = $image['fullsize'];
$alt = strlen($image['alt']) > 0 ? '<figcaption>' . e($image['alt']) . '</figcaption>' : '';
return "<figure><a href=\"$fullsizeUrl\"><img src=\"$thumbnailUrl\"></a>$alt</figure>";
}
private function getPostDescription(
string $postDisplayName,
string $postAuthorHandle,
string $postUri,
array $postRecord,
string $type
) {
$description = '';
if ($type === 'quote') {
// Quoted post/reply from bbb @bbb.com:
$postType = isset($postRecord['reply']) ? 'reply' : 'post';
$description .= "<a href=\"$postUri\">Quoted $postType</a> from <b>$postDisplayName</b> $postAuthorHandle:<br>";
} elseif ($type === 'reply') {
// Replying to aaa @aaa.com's post/reply:
$postType = isset($postRecord['reply']) ? 'reply' : 'post';
$description .= "Replying to <b>$postDisplayName</b> $postAuthorHandle's <a href=\"$postUri\">$postType</a>:<br>";
} else {
// aaa @aaa.com posted:
$description .= "<b>$postDisplayName</b> $postAuthorHandle <a href=\"$postUri\">posted</a>:<br>";
}
$description .= $this->textToDescription($postRecord);
return $description;
}
//used if handle verification fails, fallsback to displayName or DID depending on context.
private function fallbackAuthor($author, $reason)
{
if ($author['handle'] === 'handle.invalid') {
switch ($reason) {
case 'url':
return $author['did'];
case 'display':
$displayName = $author['displayName'] ?? '';
return e($displayName);
}
}
return $author['handle'];
}
private function generateVerboseTitle($post)
{
//use "Post by A, replying to B, quoting C" instead of post contents
$title = '';
if (isset($post['reason']) && str_contains($post['reason']['$type'], 'reasonRepost')) {
$title .= 'Repost by ' . $this->fallbackAuthor($post['reason']['by'], 'display') . ', post by ' . $this->fallbackAuthor($post['post']['author'], 'display');
} else {
$title .= 'Post by ' . $this->fallbackAuthor($post['post']['author'], 'display');
}
if (isset($post['reply'])) {
if (isset($post['reply']['parent']['blocked'])) {
$replyAuthor = 'blocked user';
} elseif (isset($post['reply']['parent']['notFound'])) {
$replyAuthor = 'deleted post';
} else {
$replyAuthor = $this->fallbackAuthor($post['reply']['parent']['author'], 'display');
}
$title .= ', replying to ' . $replyAuthor;
}
if (isset($post['post']['embed']) && isset($post['post']['embed']['record'])) {
if (isset($post['post']['embed']['record']['blocked'])) {
$quotedAuthor = 'blocked user';
} elseif (isset($post['post']['embed']['record']['notFound'])) {
$quotedAuthor = 'deleted post';
} elseif (isset($post['post']['embed']['record']['detached'])) {
$quotedAuthor = 'detached post';
} else {
$quotedAuthor = $this->fallbackAuthor($post['post']['embed']['record']['record']['author'] ?? $post['post']['embed']['record']['author'], 'display');
}
$title .= ', quoting ' . $quotedAuthor;
}
return $title;
}
private function resolveHandle($handle)
{
$uri = 'https://public.api.bsky.app/xrpc/com.atproto.identity.resolveHandle?handle=' . urlencode($handle);
@@ -214,17 +586,35 @@ class BlueskyBridge extends BridgeAbstract
private function getAuthorFeed($did, $filter)
{
$uri = 'https://public.api.bsky.app/xrpc/app.bsky.feed.getAuthorFeed?actor=' . urlencode($did) . '&filter=' . urlencode($filter) . '&limit=30';
if (Debug::isEnabled()) {
error_log($uri);
}
$response = json_decode(getContents($uri), true);
return $response;
}
private function resolveThumbnailUrl($authorDid, $linkRef)
private function getGeneratorViewDescription(array $record): string
{
return 'https://cdn.bsky.app/img/feed_thumbnail/plain/' . $authorDid . '/' . $linkRef . '@jpeg';
}
$avatar = e($record['avatar']);
$displayName = e($record['displayName']);
$displayHandle = e($record['creator']['handle']);
$likeCount = e($record['likeCount']);
preg_match('/\/([^\/]+)$/', $record['uri'], $matches);
$uri = e('https://bsky.app/profile/' . $record['creator']['did'] . '/feed/' . $matches[1]);
private function resolveFullsizeUrl($authorDid, $linkRef)
{
return 'https://cdn.bsky.app/img/feed_fullsize/plain/' . $authorDid . '/' . $linkRef . '@jpeg';
return <<<END
<a href="{$uri}" style="color: inherit;">
<div style="border: 1px solid #333; padding: 10px;">
<div style="display: flex; margin-bottom: 10px;">
<img src="{$avatar}" height="50" width="50" style="margin-right: 10px;">
<div style="display: flex; flex-direction: column; justify-content: center;">
<h3>{$displayName}</h3>
<span>Feed by @{$displayHandle}</span>
</div>
</div>
<span>Liked by {$likeCount} users</span>
</div>
</a>
END;
}
}

View File

@@ -26,18 +26,16 @@ TMPL;
https://www.bundestag.de/ajax/filterlist/de/parlament/praesidium/parteienfinanzierung/fundstellen50000/462002-462002
URI;
// Get the main page
$html = getSimpleHTMLDOMCached($ajaxUri, self::CACHE_TIMEOUT)
or returnServerError('Could not request AJAX list.');
$html = getSimpleHTMLDOMCached($ajaxUri, self::CACHE_TIMEOUT);
// Build the URL from the first anchor element. The list is sorted by year, descending, so the first element is the current year.
$firstAnchor = $html->find('a', 0)
or returnServerError('Could not find the proper HTML element.');
$url = 'https://www.bundestag.de' . $firstAnchor->href;
$url = $firstAnchor->href;
// Get the actual page with the soft money donations
$html = getSimpleHTMLDOMCached($url, self::CACHE_TIMEOUT)
or returnServerError('Could not request ' . $url);
$html = getSimpleHTMLDOMCached($url, self::CACHE_TIMEOUT);
$rows = $html->find('table.table > tbody > tr')
or returnServerError('Could not find the proper HTML elements.');

View File

@@ -48,6 +48,11 @@ class CentreFranceBridge extends BridgeAbstract
]
];
private static array $monthNumberByFrenchName = [
'janvier' => 1, 'février' => 2, 'mars' => 3, 'avril' => 4, 'mai' => 5, 'juin' => 6, 'juillet' => 7,
'août' => 8, 'septembre' => 9, 'octobre' => 10, 'novembre' => 11, 'décembre' => 12
];
public function collectData()
{
$value = $this->getInput('limit');
@@ -130,14 +135,22 @@ class CentreFranceBridge extends BridgeAbstract
'enclosures' => [],
];
$articleInformations = $html->find('.c-article-informations p');
$articleInformations = $html->find('#content hgroup > div.typo-p3 > *');
if (is_array($articleInformations) && $articleInformations !== []) {
$authorPosition = 1;
$publicationDateIndex = 0;
// Article author
$probableAuthorName = strip_tags($articleInformations[0]->innertext);
if (str_starts_with($probableAuthorName, 'Par ')) {
$publicationDateIndex = 1;
$item['author'] = substr($probableAuthorName, 4);
}
// Article publication date
if (preg_match('/(\d{2})\/(\d{2})\/(\d{4})( à (\d{2})h(\d{2}))?/', $articleInformations[0]->innertext, $articleDateParts) > 0) {
preg_match('/Publié le (\d{2}) (.+) (\d{4})( à (\d{2})h(\d{2}))?/', strip_tags($articleInformations[$publicationDateIndex]->innertext), $articleDateParts);
if ($articleDateParts !== [] && array_key_exists($articleDateParts[2], self::$monthNumberByFrenchName)) {
$articleDate = new \DateTime('midnight');
$articleDate->setDate($articleDateParts[3], $articleDateParts[2], $articleDateParts[1]);
$articleDate->setDate($articleDateParts[3], self::$monthNumberByFrenchName[$articleDateParts[2]], $articleDateParts[1]);
if (count($articleDateParts) === 7) {
$articleDate->setTime($articleDateParts[5], $articleDateParts[6]);
@@ -145,57 +158,31 @@ class CentreFranceBridge extends BridgeAbstract
$item['timestamp'] = $articleDate->getTimestamp();
}
// Article update date
if (count($articleInformations) >= 2 && preg_match('/(\d{2})\/(\d{2})\/(\d{4})( à (\d{2})h(\d{2}))?/', $articleInformations[1]->innertext, $articleDateParts) > 0) {
$authorPosition = 2;
$articleDate = new \DateTime('midnight');
$articleDate->setDate($articleDateParts[3], $articleDateParts[2], $articleDateParts[1]);
if (count($articleDateParts) === 7) {
$articleDate->setTime($articleDateParts[5], $articleDateParts[6]);
}
$item['timestamp'] = $articleDate->getTimestamp();
}
if (count($articleInformations) === ($authorPosition + 1)) {
$item['author'] = $articleInformations[$authorPosition]->innertext;
}
}
$articleContent = $html->find('.b-article .contenu > *');
if (is_array($articleContent)) {
$item['content'] = '';
foreach ($articleContent as $contentPart) {
if (in_array($contentPart->getAttribute('id'), ['cf-audio-player', 'poool-widget'], true)) {
continue;
$articleContent = $html->find('#content>div.flex+div.grid section>.z-10')[0] ?? null;
if ($articleContent instanceof \simple_html_dom_node) {
$articleHiddenParts = $articleContent->find('.ad-slot, #cf-digiteka-player');
if (is_array($articleHiddenParts)) {
foreach ($articleHiddenParts as $articleHiddenPart) {
$articleContent->removeChild($articleHiddenPart);
}
$articleHiddenParts = $contentPart->find('.bloc, .p402_hide');
if (is_array($articleHiddenParts)) {
foreach ($articleHiddenParts as $articleHiddenPart) {
$contentPart->removeChild($articleHiddenPart);
}
}
$item['content'] .= $contentPart->innertext;
}
$item['content'] = $articleContent->innertext;
}
$articleIllustration = $html->find('.photo-wrapper .photo-box img');
$articleIllustration = $html->find('#content>div.flex+div.grid section>figure>img');
if (is_array($articleIllustration) && count($articleIllustration) === 1) {
$item['enclosures'][] = $articleIllustration[0]->getAttribute('src');
}
$articleAudio = $html->find('#cf-audio-player-container audio');
$articleAudio = $html->find('audio[src^="https://api.octopus.saooti.com/"]');
if (is_array($articleAudio) && count($articleAudio) === 1) {
$item['enclosures'][] = $articleAudio[0]->getAttribute('src');
}
$articleTags = $html->find('.b-article > ul.c-tags > li > a.t-simple');
$articleTags = $html->find('#content>div.flex+div.grid section>.bg-gray-light>a.border-gray-dark');
if (is_array($articleTags)) {
$item['categories'] = array_map(static fn ($articleTag) => $articleTag->innertext, $articleTags);
}

View File

@@ -18,25 +18,6 @@ class CeskaTelevizeBridge extends BridgeAbstract
]
];
private function fixChars($text)
{
return html_entity_decode($text, ENT_QUOTES, 'UTF-8');
}
private function getUploadTimeFromString($string)
{
if (strpos($string, 'dnes') !== false) {
return strtotime('today');
} elseif (strpos($string, 'včera') !== false) {
return strtotime('yesterday');
} elseif (!preg_match('/(\d+).\s(\d+).(\s(\d+))?/', $string, $match)) {
returnServerError('Could not get date from Česká televize string');
}
$date = sprintf('%04d-%02d-%02d', $match[3] ?? date('Y'), $match[2], $match[1]);
return strtotime($date);
}
public function collectData()
{
$url = $this->getInput('url');
@@ -58,24 +39,38 @@ class CeskaTelevizeBridge extends BridgeAbstract
}
foreach ($html->find('#episodeListSection a[data-testid=card]') as $element) {
$itemTitle = $element->find('h3', 0);
$itemContent = $element->find('p[class^=content-]', 0);
$itemDate = $element->find('div[class^=playTime-] span, [data-testid=episode-item-broadcast] span', 0);
$itemThumbnail = $element->find('img', 0);
$itemUri = self::URI . $element->getAttribute('href');
$item = [
'title' => $this->fixChars($itemTitle->plaintext),
'uri' => $itemUri,
'content' => '<img src="' . $itemThumbnail->getAttribute('src') . '" /><br />'
. $this->fixChars($itemContent->plaintext),
'timestamp' => $this->getUploadTimeFromString($itemDate->plaintext)
'title' => $this->fixChars($element->find('h3', 0)->plaintext),
'uri' => self::URI . $element->getAttribute('href'),
'content' => '<img src="' . $element->find('img', 0)->getAttribute('srcset') . '" /><br />' . $this->fixChars($itemContent->plaintext),
'timestamp' => $this->getUploadTimeFromString($itemDate->plaintext),
];
$this->items[] = $item;
}
}
private function getUploadTimeFromString($string)
{
if (strpos($string, 'dnes') !== false) {
return strtotime('today');
} elseif (strpos($string, 'včera') !== false) {
return strtotime('yesterday');
} elseif (!preg_match('/(\d+).\s(\d+).(\s(\d+))?/', $string, $match)) {
returnServerError('Could not get date from Česká televize string');
}
$date = sprintf('%04d-%02d-%02d', $match[3] ?? date('Y'), $match[2], $match[1]);
return strtotime($date);
}
private function fixChars($text)
{
return html_entity_decode($text, ENT_QUOTES, 'UTF-8');
}
public function getURI()
{
return $this->feedUri ?? parent::getURI();

View File

@@ -109,7 +109,7 @@ class CrewbayBridge extends BridgeAbstract
public function collectData()
{
$url = $this->getURI();
$html = getSimpleHTMLDOM($url) or returnClientError('No results for this query.');
$html = getSimpleHTMLDOM($url);
$annonces = $html->find('#SearchResults div.result');
$limit = 0;

View File

@@ -53,8 +53,7 @@ class DacksnackBridge extends BridgeAbstract
public function collectData()
{
$NEWSURL = self::URI;
$html = getSimpleHTMLDOMCached($NEWSURL, 18000) or
returnServerError('Could not request: ' . $NEWSURL);
$html = getSimpleHTMLDOMCached($NEWSURL, 18000);
foreach ($html->find('a.main-news-item') as $element) {
// Debug::log($element);
@@ -64,8 +63,7 @@ class DacksnackBridge extends BridgeAbstract
$url = self::URI . $element->getAttribute('href');
$published = $this->parseSwedishDates(trim($element->find('.published', 0)->plaintext));
$article_html = getSimpleHTMLDOMCached($url, 18000) or
returnServerError('Could not request: ' . $url);
$article_html = getSimpleHTMLDOMCached($url, 18000);
$article_content = $article_html->find('#ctl00_ContentPlaceHolder1_NewsArticleVeiw_pnlArticle', 0);
$figure = self::URI . $article_content->find('img.news-image', 0)->getAttribute('src');

View File

@@ -18,8 +18,7 @@ class DagensNyheterDirektBridge extends BridgeAbstract
{
$NEWSURL = self::BASEURL . '/ajax/direkt/';
$html = getSimpleHTMLDOM($NEWSURL) or
returnServerError('Could not request: ' . $NEWSURL);
$html = getSimpleHTMLDOM($NEWSURL);
foreach ($html->find('article') as $element) {
$link = $element->find('button', 0)->getAttribute('data-link');

View File

@@ -10,9 +10,11 @@ class DansTonChatBridge extends BridgeAbstract
public function collectData()
{
$html = getSimpleHTMLDOM(self::URI . 'latest.html');
$url = self::URI . 'latest.html';
$dom = getSimpleHTMLDOM($url);
foreach ($html->find('div.item') as $element) {
$items = $dom->find('div.item');
foreach ($items as $element) {
$item = [];
$item['uri'] = $element->find('a', 0)->href;
$titleContent = $element->find('h3 a', 0);

View File

@@ -1,5 +1,7 @@
<?php
declare(strict_types=1);
/**
* Retourne les dons d'une recherche filtrée sur le site Donnons.org
* Example: https://donnons.org/Sport/Ile-de-France
@@ -44,58 +46,60 @@ class DonnonsBridge extends BridgeAbstract
{
$uri = $this->getPageURI($page);
$html = getSimpleHTMLDOM($uri);
$dom = getSimpleHTMLDOM($uri);
$searchDiv = $html->find('div[id=search]', 0);
$searchDiv = $dom->find('div[id=search]', 0);
if (!is_null($searchDiv)) {
$elements = $searchDiv->find('a.lst-annonce');
foreach ($elements as $element) {
$item = [];
if (! $searchDiv) {
return;
}
// Lien vers le don
$item['uri'] = self::URI . $element->href;
// Id de l'objet
$item['uid'] = $element->getAttribute('data-id');
$elements = $searchDiv->find('a.lst-annonce');
foreach ($elements as $element) {
$item = [];
// Grab info from json
$jsonString = $element->find('script', 0)->innertext;
$json = json_decode($jsonString, true);
// Lien vers le don
$item['uri'] = self::URI . $element->href;
// Id de l'objet
$item['uid'] = $element->getAttribute('data-id');
$name = $json['name'];
$category = $json['category'];
$date = $json['availabilityStarts'];
$description = $json['description'];
$city = $json['availableAtOrFrom']['address']['addressLocality'];
$region = $json['availableAtOrFrom']['address']['addressRegion'];
// Grab info from json
$jsonString = $element->find('script', 0)->innertext;
$json = json_decode($jsonString, true);
// Grab info from HTML
$imageSrc = $element->find('img.ima-center', 0)->getAttribute('src');
// Use large image instead of small one
$imageSrc = str_replace('/xs/', '/lg/', $imageSrc);
$image = self::URI . $imageSrc;
$author = $element->find('div.avatar-holder', 0)->plaintext;
$name = $json['name'];
$category = $json['category'];
$date = $json['availabilityStarts'];
$description = $json['description'];
$city = $json['availableAtOrFrom']['address']['addressLocality'];
$region = $json['availableAtOrFrom']['address']['addressRegion'];
$content = '
<img style="margin-right:1em;" src="' . $image . '">
<div>
<h1>' . $name . '</h1>
<p>' . $description . '</p>
<p>Lieu : <b>' . $city . '</b> - ' . $region . '</p>
<p>Par : ' . $author . '</p>
<p>Date : ' . $date . '</p>
</div>
';
// Grab info from HTML
$imageSrc = $element->find('img.ima-center', 0)->getAttribute('src');
// Use large image instead of small one
$imageSrc = str_replace('/xs/', '/lg/', $imageSrc);
$image = self::URI . $imageSrc;
$author = $element->find('div.avatar-holder', 0)->plaintext;
// Titre du don
$item['title'] = '[' . $category . '] ' . $name;
$item['timestamp'] = $date;
$item['author'] = $author;
$item['content'] = $content;
$item['enclosures'] = [$image];
$content = '
<img style="margin-right:1em;" src="' . $image . '">
<div>
<h1>' . $name . '</h1>
<p>' . $description . '</p>
<p>Lieu : <b>' . $city . '</b> - ' . $region . '</p>
<p>Par : ' . $author . '</p>
<p>Date : ' . $date . '</p>
</div>
';
$this->items[] = $item;
}
// Titre du don
$item['title'] = '[' . $category . '] ' . $name;
$item['timestamp'] = $date;
$item['author'] = $author;
$item['content'] = $content;
$item['enclosures'] = [$image];
$this->items[] = $item;
}
}

View File

@@ -41,6 +41,12 @@ class EconomistWorldInBriefBridge extends BridgeAbstract
'quote' => [
'name' => 'Include the quote of the day',
'type' => 'checkbox'
],
'mergeEverything' => [
'name' => 'Merge everything into one entry',
'type' => 'checkbox',
'defaultValue' => false,
'title' => 'Whether to merge all the stories into one entry'
]
]
];
@@ -61,7 +67,7 @@ class EconomistWorldInBriefBridge extends BridgeAbstract
}
$html = getSimpleHTMLDOM(self::URI, $headers);
$gobbets = $html->find('p[data-component="the-world-in-brief-paragraph"]');
if ($this->getInput('splitGobbets') == 1) {
if ($this->getInput('splitGobbets') == 1 && !$this->getInput('mergeEverything')) {
$this->splitGobbets($gobbets);
} else {
$this->mergeGobbets($gobbets);
@@ -77,6 +83,9 @@ class EconomistWorldInBriefBridge extends BridgeAbstract
$quote = $html->find('blockquote[data-test-id="inspirational-quote"]', 0);
$this->addQuote($quote);
}
if ($this->getInput('mergeEverything') == 1) {
$this->mergeEverything();
}
}
private function splitGobbets($gobbets)
@@ -131,6 +140,9 @@ class EconomistWorldInBriefBridge extends BridgeAbstract
if ($element->tag != 'div') {
continue;
}
if ($element->find('._newsletterContentPromo', 0) != null) {
continue;
}
$image = $element->find('figure', 0);
$title = $element->find('h3', 0)->plaintext;
$content = $element->find('h3', 0)->parent();
@@ -165,4 +177,35 @@ class EconomistWorldInBriefBridge extends BridgeAbstract
'uid' => 'quote-' . $today->format('U')
];
}
private function mergeEverything()
{
$today = new Datetime();
$today->setTime(0, 0, 0, 0);
$contents = '';
foreach ($this->items as $item) {
$header = null;
if (str_contains($item['uid'], 'story-')) {
$header = $item['title'];
} elseif (str_contains($item['uid'], 'quote-')) {
$header = 'Quote of the day';
} elseif (str_contains($item['uid'], 'world-in-brief-')) {
$header = 'World in brief';
}
if ($header != null) {
$contents .= "<h2>{$header}</h2>";
}
$contents .= $item['content'];
}
$item = [
'uri' => self::URI,
'title' => 'The Economist World in Brief ' . $today->format('d.m.Y'),
'content' => $contents,
'timestamp' => $today->format('U'),
'uid' => 'world-in-brief-merged' . $today->format('U')
];
$this->items = [$item];
}
}

View File

@@ -12,8 +12,28 @@ class EdfPricesBridge extends BridgeAbstract
'contract' => [
'name' => 'Choisir un contrat',
'type' => 'list',
// we can add later HCHP, EJP, base
'values' => ['Tempo' => '/energie/edf/tarifs/tempo'],
// we can add later more option prices
'values' => [
'Base' => '/energie/edf/tarifs/tarif-bleu#base',
'HPHC' => '/energie/edf/tarifs/tarif-bleu#hphc',
'EJP' => '/energie/edf/tarifs/tarif-bleu#ejp',
'Tempo' => '/energie/edf/tarifs/tempo'
],
],
'power' => [
'name' => 'Choisir une puissance',
'type' => 'list',
'values' => [
'3 kVA' => 3,
'6 kVA' => 6,
'9 kVA' => 9,
'12 kVA' => 12,
'15 kVA' => 15,
'18 kVA' => 18,
'24 kVA' => 24,
'30 kVA' => 30,
'36 kVA' => 36
]
]
]
];
@@ -24,36 +44,20 @@ class EdfPricesBridge extends BridgeAbstract
* @param string $contractUri
* @return void
*/
private function tempo(simple_html_dom $html, string $contractUri): void
private function tempo(simple_html_dom $html, string $contractUri, int $power): void
{
// current color and next
$daysDom = $html->find('#calendrier', 0)->nextSibling()->find('.card--ejp');
if ($daysDom && count($daysDom) === 2) {
foreach ($daysDom as $dayDom) {
$day = trim($dayDom->find('.card__title', 0)->innertext) . '/' . (new \DateTime('now'))->format(('Y'));
$dayColor = $dayDom->find('.card-ejp__icon span', 0)->innertext;
$text = $day . ' - ' . $dayColor;
$item['uri'] = self::URI . $contractUri;
$item['title'] = $text;
$item['author'] = self::MAINTAINER;
$item['content'] = $text;
$item['uid'] = hash('sha256', $item['title']);
$this->items[] = $item;
}
}
// colors
$ulDom = $html->find('#tarif-de-l-offre-tempo-edf-template-date-now-y', 0)->nextSibling()->nextSibling()->nextSibling();
$elementsDom = $ulDom->find('li');
if ($elementsDom && count($elementsDom) === 3) {
// price per kWh is same for all powers
foreach ($elementsDom as $elementDom) {
$item = [];
$matches = [];
preg_match_all('/Jour (.*) : Heures (.*) : (.*)&nbsp;€ \/ Heures (.*) : (.*)&nbsp;€/um', $elementDom->innertext, $matches, PREG_SET_ORDER, 0);
// for tempo contract we have 2x3 colors
if ($matches && count($matches[0]) === 6) {
for ($i = 0; $i < 2; $i++) {
$text = 'Jour ' . $matches[0][1] . ' - Heures ' . $matches[0][2 + 2 * $i] . ' : ' . $matches[0][3 + 2 * $i] . '€';
@@ -69,26 +73,166 @@ class EdfPricesBridge extends BridgeAbstract
}
}
// powers
$ulPowerContract = $ulDom->nextSibling()->nextSibling();
$elementsPowerContractDom = $ulPowerContract->find('li');
if ($elementsPowerContractDom && count($elementsPowerContractDom) === 4) {
foreach ($elementsPowerContractDom as $elementPowerContractDom) {
// add subscription power info
$tablePrices = $ulDom->nextSibling()->nextSibling()->nextSibling()->find('.table--responsive', 0);
$this->addSubscriptionPowerInfo($tablePrices, $contractUri, $power, 7);
}
/**
* @param simple_html_dom $html
* @param string $contractUri
* @return void
*/
private function base(simple_html_dom $html, string $contractUri, int $power): void
{
$tablePrices = $html
->find('#grille-tarifaire-et-prix-du-kwh-du-tarif-reglemente-edf-en-option-base', 0)
->nextSibling()
->nextSibling()
->nextSibling();
$prices = $tablePrices->find('.table--stripped tbody tr');
// last element is useless because part of another table
array_pop($prices);
// price per kWh is same for all powers
if ($prices && count($prices) === 9) {
$item = [];
$text = 'Base : ' . $prices[0]->children(2);
$item['uri'] = self::URI . $contractUri;
$item['title'] = $text;
$item['author'] = self::MAINTAINER;
$item['content'] = $text;
$item['uid'] = hash('sha256', $item['title']);
$this->items[] = $item;
}
$this->addSubscriptionPowerInfo($tablePrices, $contractUri, $power, 9);
}
/**
* @param simple_html_dom $html
* @param string $contractUri
* @return void
*/
private function hphc(simple_html_dom $html, string $contractUri, int $power): void
{
$tablePrices = $html
->find('#grille-tarifaire-et-prix-du-kwh-du-tarif-reglemente-edf-en-option-heures-pleines-heures-creuses', 0)
->nextSibling()
->nextSibling()
->nextSibling();
$prices = $tablePrices->find('.table--stripped tbody tr');
// last element is useless because part of another table
array_pop($prices);
// price per kWh is same for all powers
if ($prices && count($prices) === 8) {
$values = ['HC', 'HP'];
foreach ($values as $key => $value) {
$i++;
$item = [];
$matches = [];
preg_match_all('/(.*) kVA : (.*) €/um', $elementPowerContractDom->innertext, $matches, PREG_SET_ORDER, 0);
$text = $values[$key] . ' : ' . $prices[0]->children($key + 2);
$item['uri'] = self::URI . $contractUri;
$item['title'] = $text;
$item['author'] = self::MAINTAINER;
$item['content'] = $text;
$item['uid'] = hash('sha256', $item['title']);
if ($matches && count($matches[0]) === 3) {
$text = $matches[0][1] . ' kVA : ' . $matches[0][2] . '€';
$item['uri'] = self::URI . $contractUri;
$item['title'] = $text;
$item['author'] = self::MAINTAINER;
$item['content'] = $text;
$item['uid'] = hash('sha256', $item['title']);
$this->items[] = $item;
}
}
$this->items[] = $item;
$this->addSubscriptionPowerInfo($tablePrices, $contractUri, $power, 8);
}
/**
* @param simple_html_dom $html
* @param string $contractUri
* @return void
*/
private function ejp(simple_html_dom $html, string $contractUri, int $power): void
{
$tablePrices = $html
->find('#grille-tarifaire-et-prix-du-kwh-du-tarif-reglemente-edf-en-option-ejp', 0)
->nextSibling()
->nextSibling()
->nextSibling();
$prices = $tablePrices->find('.table--stripped tbody tr');
// last element is useless because part of another table
array_pop($prices);
// price per kWh is same for all powers
if ($prices && count($prices) === 5) {
$values = ['Non EJP', 'EJP'];
foreach ($values as $key => $value) {
$i++;
$item = [];
$text = $values[$key] . ' : ' . $prices[0]->children($key + 2);
$item['uri'] = self::URI . $contractUri;
$item['title'] = $text;
$item['author'] = self::MAINTAINER;
$item['content'] = $text;
$item['uid'] = hash('sha256', $item['title']);
$this->items[] = $item;
}
}
$this->addSubscriptionPowerInfo($tablePrices, $contractUri, $power, 5);
}
private function addSubscriptionPowerInfo(simple_html_dom_node $tablePrices, string $contractUri, int $power, int $numberOfPrices): void
{
$prices = $tablePrices->find('.table--stripped tbody tr');
// last element is useless because part of another table
array_pop($prices);
// 7 contracts for tempo: 6, 9, 12, 15, 18, 30 and 36 kVA
// 9 contracts for base: 3, 6, 9, 12, 15, 18, 24, 30 and 36 kVA
// 7 contracts for HPHC: 6, 9, 12, 15, 18, 24, 30 and 36 kVA
// 5 contracts for EJP: 9, 12, 15, 18 and 36 kVA
if ($prices && count($prices) === $numberOfPrices) {
$powerFound = false;
foreach ($prices as $price) {
$powerText = $price->firstChild()->firstChild()->innertext;
$powerValue = (int)substr($powerText, 0, strpos($powerText, ' kVA'));
if ($powerValue !== $power) {
continue;
}
$item = [];
$text = $powerText . ' : ' . $price->children(1) . '/an';
$item['uri'] = self::URI . $contractUri;
$item['title'] = $text;
$item['author'] = self::MAINTAINER;
$item['content'] = $text;
$item['uid'] = hash('sha256', $item['title']);
$this->items[] = $item;
$powerFound = true;
break;
}
if (!$powerFound) {
$item = [];
$text = 'Pas de tarif abonnement pour cette puissance et ce contrat';
$item['uri'] = self::URI . $contractUri;
$item['title'] = $text;
$item['author'] = self::MAINTAINER;
$item['content'] = $text;
$item['uid'] = hash('sha256', $item['title']);
$this->items[] = $item;
}
}
}
@@ -97,10 +241,23 @@ class EdfPricesBridge extends BridgeAbstract
{
$contract = $this->getKey('contract');
$contractUri = $this->getInput('contract');
$power = $this->getInput('power');
$html = getSimpleHTMLDOM(self::URI . $contractUri);
if ($contract === 'Tempo') {
$this->tempo($html, $contractUri);
$this->tempo($html, $contractUri, $power);
}
if ($contract === 'Base') {
$this->base($html, $contractUri, $power);
}
if ($contract === 'HPHC') {
$this->hphc($html, $contractUri, $power);
}
if ($contract === 'EJP') {
$this->ejp($html, $contractUri, $power);
}
}
}

View File

@@ -6,8 +6,10 @@ class FeedMergeBridge extends FeedExpander
const NAME = 'FeedMerge';
const URI = 'https://github.com/RSS-Bridge/rss-bridge';
const DESCRIPTION = <<<'TEXT'
This bridge merges two or more feeds into a single feed. Max 10 items are fetched from each feed.
TEXT;
This bridge merges two or more feeds into a single feed. <br>
Max 10 latest items are fetched from each individual feed. <br>
Items with identical url or title are considered duplicates (and are removed). <br>
TEXT;
const PARAMETERS = [
[
@@ -36,11 +38,11 @@ TEXT;
];
/**
* todo: Consider a strategy which produces a shorter feed url
* TODO: Consider a strategy which produces a shorter feed url
*/
public function collectData()
{
$limit = (int)($this->getInput('limit') ?: 10);
$limit = (int)($this->getInput('limit') ?: 99);
$feeds = [
$this->getInput('feed_1'),
$this->getInput('feed_2'),
@@ -61,7 +63,7 @@ TEXT;
if (count($feeds) > 1) {
// Allow one or more feeds to fail
try {
$this->collectExpandableDatas($feed);
$this->collectExpandableDatas($feed, 10);
} catch (HttpException $e) {
$this->logger->warning(sprintf('Exception in FeedMergeBridge: %s', create_sane_exception_message($e)));
// This feed item might be spammy. Considering dropping it.
@@ -80,31 +82,48 @@ TEXT;
throw $e;
}
} else {
$this->collectExpandableDatas($feed);
$this->collectExpandableDatas($feed, 10);
}
}
// If $this->items is empty we should consider throw exception here
// Sort by timestamp descending
// Sort by timestamp, uri, title in descending order
usort($this->items, function ($a, $b) {
$t1 = $a['timestamp'] ?? $a['uri'] ?? $a['title'];
$t2 = $b['timestamp'] ?? $b['uri'] ?? $b['title'];
return $t2 <=> $t1;
});
// Remove duplicates by using url as unique key
// Remove duplicates by url
$items = [];
foreach ($this->items as $item) {
$index = $item['uri'] ?? null;
if ($index) {
// Overwrite duplicates
$items[$index] = $item;
$uri = $item['uri'] ?? null;
if ($uri) {
// Insert or override the existing duplicate
$items[$uri] = $item;
} else {
// The item doesn't have a uri!
$items[] = $item;
}
}
$this->items = array_slice(array_values($items), 0, $limit);
$this->items = array_values($items);
// Remove duplicates by title
$items = [];
foreach ($this->items as $item) {
$title = $item['title'] ?? null;
if ($title) {
// Insert or override the existing duplicate
$items[$title] = $item;
} else {
// The item doesn't have a title!
$items[] = $item;
}
}
$this->items = array_values($items);
$this->items = array_slice($this->items, 0, $limit);
}
public function getIcon()

View File

@@ -60,7 +60,7 @@ class FindACrewBridge extends BridgeAbstract
CURLOPT_POSTFIELDS => http_build_query($data) . "\n"
];
$html = getSimpleHTMLDOM($url, $header, $opts) or returnClientError('No results for this query.');
$html = getSimpleHTMLDOM($url, $header, $opts);
$annonces = $html->find('.css_SrhRst');
$limit = $this->getInput('limit') ?? 10;

View File

@@ -5,13 +5,13 @@ class Formula1Bridge extends BridgeAbstract
const NAME = 'Formula1 Bridge';
const URI = 'https://formula1.com/';
const DESCRIPTION = 'Returns latest official Formula 1 news';
const MAINTAINER = 'AxorPL';
const MAINTAINER = 'axor-mst';
const API_KEY = 'qPgPPRJyGCIPxFT3el4MF7thXHyJCzAP';
const API_KEY = 'xZ7AOODSjiQadLsIYWefQrpCSQVDbHGC';
const API_URL = 'https://api.formula1.com/v1/editorial/articles?limit=%u';
const ARTICLE_AUTHOR = 'Formula 1';
const ARTICLE_URL = 'https://formula1.com/en/latest/article.%s.%s.html';
const ARTICLE_URL = 'https://formula1.com/en/latest/article/%s.%s';
const LIMIT_MIN = 1;
const LIMIT_DEFAULT = 10;
@@ -36,7 +36,11 @@ class Formula1Bridge extends BridgeAbstract
$limit = min(self::LIMIT_MAX, max(self::LIMIT_MIN, $limit));
$url = sprintf(self::API_URL, $limit);
$json = json_decode(getContents($url, ['apikey: ' . self::API_KEY]));
$json = json_decode(getContents($url, [
'Accept: application/json',
'apikey: ' . self::API_KEY,
'locale: en'
]));
if (property_exists($json, 'error')) {
returnServerError($json->message);
}

View File

@@ -1,78 +0,0 @@
<?php
class FragDenStaatBridge extends BridgeAbstract
{
const MAINTAINER = 'swofl';
const NAME = 'FragDenStaat';
const URI = 'https://fragdenstaat.de';
const CACHE_TIMEOUT = 2 * 60 * 60; // 2h
const DESCRIPTION = 'Get latest blog posts from FragDenStaat Exklusiv';
const PARAMETERS = [ [
'qLimit' => [
'name' => 'Query Limit',
'title' => 'Amount of articles to query',
'type' => 'number',
'defaultValue' => 5,
],
] ];
protected function parseTeaser($teaser)
{
$result = [];
$header = $teaser->find('h3 > a', 0);
$result['title'] = $header->plaintext;
$result['uri'] = static::URI . $header->href;
$result['enclosures'] = [];
$result['enclosures'][] = $teaser->find('img', 0)->src;
$result['uid'] = hash('sha256', $result['title']);
$result['timestamp'] = strtotime($teaser->find('time', 0)->getAttribute('datetime'));
return $result;
}
public function collectData()
{
$html = getSimpleHTMLDOM(self::URI . '/artikel/exklusiv/');
$queryLimit = (int) $this->getInput('qLimit');
if ($queryLimit > 12) {
$queryLimit = 12;
}
$teasers = [];
$teaserElements = $html->find('article');
for ($i = 0; $i < $queryLimit; $i++) {
array_push($teasers, $this->parseTeaser($teaserElements[$i]));
}
foreach ($teasers as $article) {
$articleHtml = getSimpleHTMLDOMCached($article['uri'], static::CACHE_TIMEOUT * 6);
$articleCore = $articleHtml->find('article.blog-article', 0);
$content = '';
$lead = $articleCore->find('div.lead > p', 0)->innertext;
$content .= '<h2>' . $lead . '</h2>';
foreach ($articleCore->find('div.blog-content > p, div.blog-content > h3') as $paragraph) {
$content .= $paragraph->outertext;
}
$article['content'] = '<img src="' . $article['enclosures'][0] . '"/>' . $content;
$article['author'] = '';
foreach ($articleCore->find('a[rel="author"]') as $author) {
$article['author'] .= $author->innertext . ', ';
}
$article['author'] = rtrim($article['author'], ', ');
$this->items[] = $article;
}
}
}

View File

@@ -34,8 +34,7 @@ class FurAffinityUserBridge extends BridgeAbstract
$url = self::URI . '/gallery/' . $this->getInput('searchUsername');
$html = getSimpleHTMLDOM($url, [], $opt)
or returnServerError('Could not load the user\'s gallery page.');
$html = getSimpleHTMLDOM($url, [], $opt);
$submissions = $html->find('section[id=gallery-gallery]', 0)->find('figure');
foreach ($submissions as $submission) {

View File

@@ -155,8 +155,7 @@ class GiteaBridge extends BridgeAbstract
public function collectData()
{
$html = getSimpleHTMLDOM($this->getURI())
or returnServerError('Could not request ' . $this->getURI());
$html = getSimpleHTMLDOM($this->getURI());
$html = defaultLinkTo($html, $this->getURI());
$this->title = $html->find('[property="og:title"]', 0)->content;
@@ -246,8 +245,7 @@ class GiteaBridge extends BridgeAbstract
];
if ($this->getInput('include_description')) {
$issue_html = getSimpleHTMLDOMCached($uri, 3600)
or returnServerError('Unable to load issue description');
$issue_html = getSimpleHTMLDOMCached($uri, 3600);
$issue_html = defaultLinkTo($issue_html, $uri);
@@ -308,8 +306,7 @@ class GiteaBridge extends BridgeAbstract
];
if ($this->getInput('include_description')) {
$issue_html = getSimpleHTMLDOMCached($uri, 3600)
or returnServerError('Unable to load issue description');
$issue_html = getSimpleHTMLDOMCached($uri, 3600);
$issue_html = defaultLinkTo($issue_html, $uri);

View File

@@ -28,7 +28,7 @@ class GlowficBridge extends BridgeAbstract
public function collectData()
{
$url = $this->getAPIURI();
$metadata = get_headers($url . '/replies', true) or returnClientError('Post did not return reply headers.');
$metadata = get_headers($url . '/replies', true);
$metadata['Last-Page'] = ceil($metadata['Total'] / $metadata['Per-Page']);
if (
!is_null($this->getInput('start_page')) &&

View File

@@ -171,8 +171,7 @@ class GogsBridge extends BridgeAbstract
];
if ($this->getInput('include_description')) {
$issue_html = getSimpleHTMLDOMCached($uri, 3600)
or returnServerError('Unable to load issue description');
$issue_html = getSimpleHTMLDOMCached($uri, 3600);
$issue_html = defaultLinkTo($issue_html, $uri);

View File

@@ -53,7 +53,7 @@ class GolemBridge extends FeedExpander
]
]];
const LIMIT = 5;
const HEADERS = ['Cookie: golem_consent20=simple|220101;'];
const HEADERS = ['Cookie: golem_consent20=simple|250101;'];
public function collectData()
{

View File

@@ -109,7 +109,7 @@ class GoogleScholarBridge extends BridgeAbstract
case 'user':
$userId = $this->getInput('userId');
$uri = self::URI . '/citations?hl=en&view_op=list_works&sortby=pubdate&user=' . $userId;
$html = getSimpleHTMLDOM($uri) or returnServerError('Could not fetch Google Scholar data.');
$html = getSimpleHTMLDOM($uri);
$publications = $html->find('tr[class="gsc_a_tr"]');
@@ -184,7 +184,7 @@ class GoogleScholarBridge extends BridgeAbstract
$uri .= $sortBy ? '&scisbd=1' : '';
$uri .= $numResults ? '&num=' . $numResults : '';
$html = getSimpleHTMLDOM($uri) or returnServerError('Could not fetch Google Scholar data.');
$html = getSimpleHTMLDOM($uri);
$publications = $html->find('div[class="gs_r gs_or gs_scl"]');

View File

@@ -438,8 +438,7 @@ class ItakuBridge extends BridgeAbstract
private function getOwnerID($username)
{
$url = self::URI . "/api/user_profiles/{$username}/?format=json";
$data = $this->getData($url, true, true)
or returnServerError("Could not load $url");
$data = $this->getData($url, true, true);
return $data['owner'];
}
@@ -451,8 +450,7 @@ class ItakuBridge extends BridgeAbstract
}
$uri = self::URI . '/posts/' . $id;
$url = self::URI . '/api/posts/' . $id . '/?format=json';
$data = $metadata ?? $this->getData($url, true, true)
or returnServerError("Could not load $url");
$data = $metadata ?? $this->getData($url, true, true);
$content_str = nl2br($data['content']);
$content = "<p>{$content_str}</p><br/>"; //TODO: Add link and itaku user mention detection and convert into links.
@@ -497,8 +495,7 @@ class ItakuBridge extends BridgeAbstract
$content .= "<a href=\"{$url}\"><b>{$title}</b></a><br/>";
if ($media['is_thumbnail_for_video']) {
$url = self::URI . '/api/galleries/images/' . $media['id'] . '/?format=json';
$media_data = $this->getData($url, true, true)
or returnServerError("Could not load $url");
$media_data = $this->getData($url, true, true);
$content .= "<video controls src=\"{$media_data['video']['video']}\" poster=\"{$media['image_xl']}\"/>";
} else {
$content .= "<a href=\"{$url}\"><img src=\"{$src}\"></a>";
@@ -523,11 +520,11 @@ class ItakuBridge extends BridgeAbstract
$url = self::URI . '/api/commissions/' . $id . '/?format=json';
$uri = self::URI . '/commissions/' . $id;
$data = $metadata ?? $this->getData($url, true, true)
or returnServerError("Could not load $url");
$data = $metadata ?? $this->getData($url, true, true);
$content_str = nl2br($data['description']);
$content = "<p>{$content_str}</p><br>"; //TODO: Add link and itaku user mention detection and convert into links.
$content = "<p>{$content_str}</p><br>";
//TODO: Add link and itaku user mention detection and convert into links.
if (array_key_exists('tags', $data) && count($data['tags']) > 0) {
// $content .= "🏷 Tag(s): ";
@@ -570,8 +567,7 @@ class ItakuBridge extends BridgeAbstract
$content .= "<a href=\"{$uri}\"><b>{$data['thumbnail_detail']['title']}</b></a><br/>";
if ($data['thumbnail_detail']['is_thumbnail_for_video']) {
$url = self::URI . '/api/galleries/images/' . $data['thumbnail_detail']['id'] . '/?format=json';
$media_data = $this->getData($url, true, true)
or returnServerError("Could not load $url");
$media_data = $this->getData($url, true, true);
$content .= "<video controls src=\"{$media_data['video']['video']}\" poster=\"{$data['thumbnail_detail']['image_lg']}\"/>";
} else {
$content .= "<a href=\"{$uri}\"><img src=\"{$data['thumbnail_detail']['image_lg']}\"></a>";
@@ -595,8 +591,7 @@ class ItakuBridge extends BridgeAbstract
{
$uri = self::URI . '/images/' . $id;
$url = self::URI . '/api/galleries/images/' . $id . '/?format=json';
$data = /* $metadata ?? */ $this->getData($url, true, true)
or returnServerError("Could not load $url");
$data = /* $metadata ?? */ $this->getData($url, true, true);
$content_str = nl2br($data['description']);
$content = "<p>{$content_str}</p><br/>"; //TODO: Add link and itaku user mention detection and convert into links.
@@ -640,8 +635,7 @@ class ItakuBridge extends BridgeAbstract
if (array_key_exists('is_thumbnail_for_video', $data)) {
$url = self::URI . '/api/galleries/images/' . $data['id'] . '/?format=json';
$media_data = $this->getData($url, true, true)
or returnServerError("Could not load $url");
$media_data = $this->getData($url, true, true);
$content .= "<video controls src=\"{$media_data['video']['video']}\" poster=\"{$data['image_xl']}\"/>";
} else {
if (array_key_exists('video', $data) && is_null($data['video'])) {

View File

@@ -9,8 +9,7 @@ class JohannesBlickBridge extends BridgeAbstract
public function collectData()
{
$html = getSimpleHTMLDOM(self::URI)
or returnServerError('Could not request: ' . self::URI);
$html = getSimpleHTMLDOM(self::URI);
$html = defaultLinkTo($html, self::URI);
foreach ($html->find('ul[class=easyfolderlisting] > li > a') as $index => $a) {

View File

@@ -181,8 +181,7 @@ class JustETFBridge extends BridgeAbstract
if ($this->getInput('full')) {
$uri = $this->extractNewsUri($article);
$html = getSimpleHTMLDOMCached($uri)
or returnServerError('Failed loading full article from ' . $uri);
$html = getSimpleHTMLDOMCached($uri);
$fullArticle = $html->find('div.article', 0)
or returnServerError('No content found! Layout might have changed!');

View File

@@ -64,10 +64,6 @@ Returns feeds for bug comments';
DEFAULT_SPAN_TEXT
);
if ($html === false) {
returnServerError('Failed to load page!');
}
$html = defaultLinkTo($html, self::URI);
// Store header information into private members

View File

@@ -11,7 +11,7 @@ class LaTeX3ProjectNewslettersBridge extends BridgeAbstract
public function collectData()
{
$html = getSimpleHTMLDOM(static::URI . '/news/latex3-news/') or returnServerError('No contents received!');
$html = getSimpleHTMLDOM(static::URI . '/news/latex3-news/');
$newsContainer = $html->find('article tbody', 0);
foreach ($newsContainer->find('tr') as $row) {

View File

@@ -14,6 +14,37 @@ class LegifranceJOBridge extends BridgeAbstract
private $timestamp;
private $uri;
public function collectData()
{
$html = getSimpleHTMLDOM(self::URI);
$title = $html->find('h2.titleJO', 0);
//$this->author = trim($title->plaintext);
$uri1 = $html->find('h2.titleELI', 0);
//$uri = $uri1->plaintext;
//$this->uri = trim(substr($uri, strpos($uri, 'https')));
$this->timestamp = strtotime(substr($this->uri, strpos($this->uri, 'eli/jo/') + strlen('eli/jo/'), -5));
foreach ($html->find('h3') as $section) {
$subsections = $section->nextSibling()->find('h4');
foreach ($subsections as $subsection) {
$origins = $subsection->nextSibling()->find('h5');
foreach ($origins as $origin) {
$this->items[] = $this->extractItem($section, $subsection, $origin);
}
if (!empty($origins)) {
continue;
}
$this->items[] = $this->extractItem($section, $subsection);
}
if (!empty($subsections)) {
continue;
}
$this->items[] = $this->extractItem($section);
}
}
private function extractItem($section, $subsection = null, $origin = null)
{
$item = [];
@@ -35,7 +66,9 @@ class LegifranceJOBridge extends BridgeAbstract
$item['content'] = '';
foreach ($data->nextSibling()->find('a') as $content) {
$text = $content->plaintext;
$href = $content->nextSibling()->getAttribute('resource');
$href = '';
//$href = $content->nextSibling()->getAttribute('resource');
$item['content'] .= '<p><a href="' . $href . '">' . $text . '</a></p>';
}
return $item;
@@ -45,33 +78,4 @@ class LegifranceJOBridge extends BridgeAbstract
{
return 'https://www.legifrance.gouv.fr/img/favicon.ico';
}
public function collectData()
{
$html = getSimpleHTMLDOM(self::URI)
or $this->returnServer('Unable to download ' . self::URI);
$this->author = trim($html->find('h2.titleJO', 0)->plaintext);
$uri = $html->find('h2.titleELI', 0)->plaintext;
$this->uri = trim(substr($uri, strpos($uri, 'https')));
$this->timestamp = strtotime(substr($this->uri, strpos($this->uri, 'eli/jo/') + strlen('eli/jo/'), -5));
foreach ($html->find('h3') as $section) {
$subsections = $section->nextSibling()->find('h4');
foreach ($subsections as $subsection) {
$origins = $subsection->nextSibling()->find('h5');
foreach ($origins as $origin) {
$this->items[] = $this->extractItem($section, $subsection, $origin);
}
if (!empty($origins)) {
continue;
}
$this->items[] = $this->extractItem($section, $subsection);
}
if (!empty($subsections)) {
continue;
}
$this->items[] = $this->extractItem($section);
}
}
}

110
bridges/LfcPlBridge.php Normal file
View File

@@ -0,0 +1,110 @@
<?php
class LfcPlBridge extends BridgeAbstract
{
const NAME = 'LFC (lfc.pl)';
const DESCRIPTION = 'LFC.pl - największa polska strona o Liverpool FC';
const URI = 'https://lfc.pl';
const MAINTAINER = 'brtsos';
const PARAMETERS = [
[
'comments' => [
'type' => 'list',
'name' => 'Include comments',
'title' => 'Include comments in the article content',
'values' => [
'No' => 'no',
'Yes' => 'yes',
],
]
]
];
public function collectData()
{
$dom = getSimpleHTMLDOM(self::URI . '/Archiwum/' . date('Y') . date('m'));
$list = $dom->find('#page .list-vertical li');
$list = array_reverse($list);
$list = array_slice($list, 0, 10);
foreach ($list as $li) {
$link = $li->find('a', 0);
$url = self::URI . $link->href;
$articleDom = getSimpleHTMLDOM($url);
$description = $this->getContent($articleDom);
if (mb_strpos($description, 'Artykuł sponsorowany') !== false) {
continue;
}
$image = '<img src="' . $this->getImage($articleDom) . '" alt="' . $link->plaintext . '" />';
$content = $image . '</br>' . $description;
$tagsToRemove = ['script', 'iframe', 'input', 'form'];
$content = sanitize($content, $tagsToRemove);
$footerArticle = $articleDom->find('.footer', 0)->find('.item', 0)->find('div', 1);
$author = $footerArticle->find('a', 0)->plaintext;
$dateTime = $footerArticle->find('div', 0)->plaintext;
$date = DateTime::createFromFormat('d.m.Y H:i', $dateTime);
$timestamp = $date->getTimestamp();
$this->items[] = [
'title' => $link->plaintext,
'uri' => $url,
'timestamp' => $timestamp,
'content' => $content,
'author' => $author,
];
}
}
private function getContent($article)
{
$content = $article->find('.news-body', 0)->innertext;
$commentsHtml = $article->find('#comments', 0);
$comments = '';
if ($this->withComment()) {
if ($commentsHtml) {
$commentsDom = $commentsHtml->find('.comment');
if (count($commentsDom) > 0) {
$comments = '<h3>Komentarze:</h3>';
}
foreach ($commentsDom as $comment) {
$header = $comment->find('.header', 0)->plaintext;
$content = $comment->find('.content', 0)->plaintext;
$comments .= $header . '<br />' . $content . '<br /><br />';
}
}
}
return $content . '<br /> <br />' . $comments;
}
private function getImage($article): ?string
{
$imgElement = $article->find('#news .img', 0);
if ($imgElement) {
$style = $imgElement->style;
if (preg_match('/background-image:\s*url\(([^)]+)\)/i', $style, $matches)) {
return self::URI . trim($matches[1], "'\"");
}
return null;
}
return null;
}
private function withComment(): bool
{
return $this->getInput('comments') === 'yes';
}
}

View File

@@ -1,49 +1,80 @@
<?php
class MixologyBridge extends FeedExpander
class MixologyBridge extends BridgeAbstract
{
const MAINTAINER = 'swofl';
const NAME = 'Mixology';
const URI = 'https://mixology.eu';
const CACHE_TIMEOUT = 6 * 60 * 60; // 6h
const DESCRIPTION = 'Get latest blog posts from Mixology';
const PARAMETERS = [ [
'limit' => self::LIMIT,
] ];
public function collectData()
{
$feed_url = self::URI . '/feed';
$limit = $this->getInput('limit') ?? 10;
$this->collectExpandableDatas($feed_url, $limit);
$html = getSimpleHTMLDOM(self::URI);
$teasers = [];
$teaserElements = [];
$teaserElements[] = $html->find('.aufmacher .views-view-responsive-grid__item-inner', 0);
foreach ($html->find('.block-views-blockmixology-frontpage-block-2 .views-col') as $teaser) {
$teaserElements[] = $teaser;
}
foreach ($teaserElements as $teaser) {
$teasers[] = $this->parseTeaser($teaser);
}
foreach ($teasers as $article) {
$this->items[] = $this->parseItem($article);
}
}
protected function parseTeaser($teaser)
{
$result = [];
$title = $teaser->find('.views-field-title a', 0);
$result['title'] = $title->plaintext;
$result['uri'] = self::URI . $title->href;
$result['enclosures'] = [];
$result['enclosures'][] = self::URI . $teaser->find('img', 0)->src;
$result['uid'] = hash('sha256', $result['title']);
$categories = $teaser->find('.views-field-field-kategorie', 0);
if ($categories) {
$result['categories'] = [];
foreach ($categories->find('a') as $category) {
$result['categories'][] = $category->innertext;
}
}
return $result;
}
protected function parseItem(array $item)
{
$article = getSimpleHTMLDOMCached($item['uri']);
$content = '';
$headerImage = $article->find('div.edgtf-full-width img.wp-post-image', 0);
if (is_object($headerImage)) {
$item['enclosures'] = [];
$item['enclosures'][] = $headerImage->src;
$content .= '<img src="' . $headerImage->src . '"/>';
$authorLink = $article->find('.beitrag-author a', 0);
if (!empty($authorLink)) {
$item['author'] = $authorLink->plaintext;
}
foreach ($article->find('article .wpb_content_element > .wpb_wrapper') as $element) {
$timeElement = $article->find('.beitrag-date time', 0);
if (!empty($timeElement)) {
$item['timestamp'] = strtotime($timeElement->datetime);
}
$content = '';
$content .= '<img src="' . $item['enclosures'][0] . '"/>';
foreach ($article->find('article .wpb_content_element>.wpb_wrapper, article .field--type-text-with-summary>.wp-block-columns>.wp-block-column') as $element) {
$content .= $element->innertext;
}
$item['content'] = $content;
$item['categories'] = [];
foreach ($article->find('.edgtf-tags > a') as $tag) {
$item['categories'][] = $tag->plaintext;
}
return $item;
}
}

View File

@@ -19,14 +19,28 @@ class MondeDiploBridge extends BridgeAbstract
foreach ($html->find('div.unarticle') as $article) {
$element = $article->parent();
$title = $element->find('h3', 0)->plaintext;
$datesAuteurs = $element->find('div.dates_auteurs', 0)->plaintext;
$titleElement = $element->find('h3', 0);
if (!$titleElement) {
continue;
}
$title = $titleElement->plaintext;
$datesAuteursElement = $element->find('div.dates_auteurs', 0);
$datesAuteurs = is_null($datesAuteursElement) ? '' : $element->find('div.dates_auteurs', 0)->plaintext;
$item = [];
$item['uri'] = urljoin(self::URI, $element->href);
$item['title'] = $this->cleanText($title) . ' - ' . $this->cleanText($datesAuteurs);
$item['title'] = $this->getItemTitle($title, $datesAuteurs);
$item['content'] = $this->cleanText(str_replace([$title, $datesAuteurs], '', $element->plaintext));
$this->items[] = $item;
}
}
private function getItemTitle($title, $datesAuteurs)
{
$itemTitle = $this->cleanText($title);
if (strlen($datesAuteurs) > 0) {
$itemTitle .= ' - ' . $this->cleanText($datesAuteurs);
}
return $itemTitle;
}
}

View File

@@ -64,10 +64,6 @@ Returns feeds for bug comments';
DEFAULT_SPAN_TEXT
);
if ($html === false) {
returnServerError('Failed to load page!');
}
// Fix relative URLs
defaultLinkTo($html, self::URI);

View File

@@ -25,8 +25,7 @@ class OMonlineBridge extends BridgeAbstract
$url = sprintf('%s', self::URI);
}
$html = getSimpleHTMLDOM($url)
or returnServerError('Could not request: ' . $url);
$html = getSimpleHTMLDOM($url);
$html = defaultLinkTo($html, $url);
@@ -35,8 +34,7 @@ class OMonlineBridge extends BridgeAbstract
$articlePath = $a->href;
$articlePageHtml = getSimpleHTMLDOMCached($articlePath, self::CACHE_TIMEOUT)
or returnServerError('Could not request: ' . $articlePath);
$articlePageHtml = getSimpleHTMLDOMCached($articlePath, self::CACHE_TIMEOUT);
$articlePageHtml = defaultLinkTo($articlePageHtml, self::URI);

61
bridges/OllamaBridge.php Normal file
View File

@@ -0,0 +1,61 @@
<?php
class OllamaBridge extends BridgeAbstract
{
const MAINTAINER = 'sqrtminusone';
const NAME = 'Ollama Blog Bridge';
const URI = 'https://ollama.com';
const CACHE_TIMEOUT = 3600; // 1 hour
const DESCRIPTION = 'Returns latest blog posts from Ollama';
const PARAMETERS = [
'' => [
'limit' => [
'name' => 'Limit',
'type' => 'number',
'required' => true,
'defaultValue' => 10
],
]
];
public function collectData()
{
$html = getSimpleHTMLDOM(self::URI . '/blog/');
$limit = $this->getInput('limit');
$posts = $html->find('main > section > a.group');
for ($i = 0; $i < min(count($posts), $limit); $i++) {
$post = $posts[$i];
$title = $post->find('h2', 0)->plaintext;
$date_text = $post->find('h3[datetime]', 0)->getAttribute('datetime');
$timestamp = (new DateTime(mb_substr($date_text, 0, 19)))->format('U');
$uri = self::URI . $post->getAttribute('href');
$this->items[] = [
'uri' => $uri,
'title' => $title,
'timestamp' => $timestamp,
'content' => $this->parsePage($uri),
'uid' => $uri
];
}
}
private function parsePage($uri)
{
$html = getSimpleHTMLDOMCached(
$uri,
86400,
[],
[],
true,
true,
DEFAULT_TARGET_CHARSET,
false // Do not strip \n from <code> blocks
);
$contents = $html->find('main > article > section.prose', 0);
$contents = defaultLinkTo($contents, self::URI);
return $contents->innertext;
}
}

View File

@@ -234,11 +234,14 @@ class RedditBridge extends BridgeAbstract
} elseif ($data->is_video) {
// Video
// Higher index -> Higher resolution
end($data->preview->images[0]->resolutions);
$index = key($data->preview->images[0]->resolutions);
$item['content'] = $this->createFigureLink($data->url, $data->preview->images[0]->resolutions[$index]->url, 'Video');
if ($data->media->reddit_video) {
$item['content'] = $this->createVideoContent($data->media->reddit_video);
} else {
// Higher index -> Higher resolution
end($data->preview->images[0]->resolutions);
$index = key($data->preview->images[0]->resolutions);
$item['content'] = $this->createFigureLink($data->url, $data->preview->images[0]->resolutions[$index]->url, 'Video');
}
} elseif (isset($data->media) && $data->media->type == 'youtube.com') {
// Youtube link
$item['content'] = $this->createFigureLink($data->url, $data->media->oembed->thumbnail_url, 'YouTube');
@@ -318,6 +321,16 @@ class RedditBridge extends BridgeAbstract
return sprintf('<a href="%s">%s</a>', $href, $text);
}
private function createVideoContent(\stdClass $video): string
{
return <<<HTML
<video width="$video->width" height="$video->height" controls>
<source src="$video->fallback_url" type="video/mp4">
Your browser does not support the video tag.
</video>
HTML;
}
public function detectParameters($url)
{
try {

View File

@@ -60,15 +60,10 @@ class RumbleBridge extends BridgeAbstract
$dom = getSimpleHTMLDOM($url);
foreach ($dom->find('ol.thumbnail__grid div.thumbnail__grid--item') as $video) {
$itemUrlString = self::URI . $video->find('a', 0)->href;
$itemUrl = Url::fromString($itemUrlString);
$href = $video->find('a', 0)->href;
$item = [
'title' => $video->find('h3', 0)->plaintext,
// Remove tracking parameter in query string
'uri' => $itemUrl->withQueryString(null)->__toString(),
'author' => $account . '@rumble.com',
'content' => defaultLinkTo($video, self::URI)->innertext,
];
@@ -78,6 +73,12 @@ class RumbleBridge extends BridgeAbstract
$publishedAt = new \DateTimeImmutable($time->getAttribute('datetime'));
$item['timestamp'] = $publishedAt->getTimestamp();
}
$href = ltrim($href, '/');
$itemUrl = Url::fromString(self::URI . $href);
// Remove tracking parameter in query string
$item['uri'] = $itemUrl->withQueryString(null)->__toString();
$this->items[] = $item;
}
}

View File

@@ -132,7 +132,7 @@ class RutubeBridge extends BridgeAbstract
$video->description . ' '
)
);
$item['timestamp'] = $video->created_ts;
$item['timestamp'] = $video->publication_ts;
$item['author'] = $video->author->name;
$item['content'] = $content;

View File

@@ -49,8 +49,7 @@ class SchweinfurtBuergerinformationenBridge extends BridgeAbstract
private function getArticleIDsFromPage($page)
{
$url = sprintf(self::URI . '?art_pager=%d', $page);
$html = getSimpleHTMLDOMCached($url, self::INDEX_CACHE_TIMEOUT)
or returnServerError('Could not retrieve ' . $url);
$html = getSimpleHTMLDOMCached($url, self::INDEX_CACHE_TIMEOUT);
$articles = $html->find('div.artikel-uebersicht');
$articleIDs = [];
@@ -70,8 +69,7 @@ class SchweinfurtBuergerinformationenBridge extends BridgeAbstract
private function generateItemFromArticle($id)
{
$url = sprintf(self::ARTICLE_URI, $id);
$html = getSimpleHTMLDOMCached($url, self::ARTICLE_CACHE_TIMEOUT)
or returnServerError('Could not retrieve ' . $url);
$html = getSimpleHTMLDOMCached($url, self::ARTICLE_CACHE_TIMEOUT);
$div = $html->find('div#artikel-detail', 0);
$divContent = $div->find('.c-content', 0);

View File

@@ -633,8 +633,7 @@ class SkimfeedBridge extends BridgeAbstract
$author = '<a href="' . $anchor->href . '">' . trim($anchor->plaintext) . '</a>';
$uri = $anchor->href;
$box_html = getSimpleHTMLDOM($uri)
or returnServerError('Could not load custom feed!');
$box_html = getSimpleHTMLDOM($uri);
$this->extractFeed($box_html, $author);
}
@@ -665,8 +664,7 @@ class SkimfeedBridge extends BridgeAbstract
*/
private function exportBoxChannels()
{
$html = getSimpleHTMLDOMCached(static::URI)
or returnServerError('No contents received from Skimfeed!');
$html = getSimpleHTMLDOMCached(static::URI);
if (!$this->isCompatible($html)) {
returnServerError('Skimfeed version is not compatible!');
@@ -722,8 +720,7 @@ EOD;
*/
private function exportTechChannels()
{
$html = getSimpleHTMLDOMCached(static::URI)
or returnServerError('No contents received from Skimfeed!');
$html = getSimpleHTMLDOMCached(static::URI);
if (!$this->isCompatible($html)) {
returnServerError('Skimfeed version is not compatible!');
@@ -759,8 +756,7 @@ EOD;
$message .= "\t\t'{$title}' => array(\n";
$channel_html = getSimpleHTMLDOMCached(static::URI . $uri)
or returnServerError('Could not load tech channel ' . $channel->plaintext . '!');
$channel_html = getSimpleHTMLDOMCached(static::URI . $uri);
$boxes = $channel_html->find('#boxx .boxes')
or returnServerError('Could not find boxes!');

View File

@@ -30,8 +30,7 @@ class StanfordSIRbookreviewBridge extends BridgeAbstract
break;
}
$html = getSimpleHTMLDOM($url)
or returnServerError('Failed loading content!');
$html = getSimpleHTMLDOM($url);
foreach ($html->find('article') as $element) {
$item = [];
$item['title'] = $element->find('div > h4 > a', 0)->plaintext;

View File

@@ -65,7 +65,7 @@ class StockFilingsBridge extends FeedExpander
{
$uri = $this->getSearchUrl();
return getSimpleHTMLDOM($uri) ?: returnServerError('Could not request SEC.');
return getSimpleHTMLDOM($uri);
}
/**

View File

@@ -25,9 +25,6 @@ class StorytelBridge extends BridgeAbstract
}
$html = getSimpleHTMLDOM($url);
if (!$html) {
returnServerError('Unable to fetch Storytel list');
}
foreach ($html->find('li.sc-4615116a-1') as $element) {
$item = [];

View File

@@ -36,7 +36,7 @@ class TapasBridge extends FeedExpander
$this->id = $this->getInput('title');
}
if ($this->getInput('force_title') || !$this->id) {
$html = getSimpleHTMLDOM($this->getURI()) or returnServerError('Could not request ' . $this->getURI());
$html = getSimpleHTMLDOM($this->getURI());
$this->id = $html->find('meta[property$=":url"]', 0)->content;
$this->id = str_ireplace(['tapastic://series/', '/info'], '', $this->id);
}

View File

@@ -15,6 +15,14 @@ class TelegramBridge extends BridgeAbstract
]
]
];
const CONFIGURATION = [
'max_pages' => [
'required' => false,
'defaultValue' => 1,
],
];
const TEST_DETECT_PARAMETERS = [
'https://t.me/s/rssbridge' => ['username' => 'rssbridge'],
'https://t.me/rssbridge' => ['username' => 'rssbridge'],
@@ -26,7 +34,7 @@ class TelegramBridge extends BridgeAbstract
'https://rssbridge.t.me/' => ['username' => 'rssbridge'],
];
const CACHE_TIMEOUT = 60 * 15; // 15 mins
const CACHE_TIMEOUT = 60 * 60; // 1h
private $feedName = '';
private $enclosures = [];
@@ -36,33 +44,56 @@ class TelegramBridge extends BridgeAbstract
public function collectData()
{
$html = getSimpleHTMLDOM($this->getURI());
$pages = 0;
$url = 'https://t.me/s/' . $this->normalizeUsername();
$channelTitle = $html->find('div.tgme_channel_info_header_title span', 0)->plaintext ?? '';
$channelTitle = htmlspecialchars_decode($channelTitle, ENT_QUOTES);
$this->feedName = $channelTitle . ' (@' . $this->normalizeUsername() . ')';
$posts = $html->find('div.tgme_widget_message_wrap.js-widget_message_wrap');
if (!$channelTitle && !$posts) {
throw new \Exception('Unable to find channel. The channel is non-existing or non-public.');
}
foreach ($posts as $messageDiv) {
$this->itemTitle = '';
$this->enclosures = [];
$item = [];
$max_pages = $this->getOption('max_pages');
$item['uri'] = $messageDiv->find('a.tgme_widget_message_date', 0)->href;
$item['content'] = $this->processContent($messageDiv);
$item['title'] = $this->itemTitle;
$item['timestamp'] = $messageDiv->find('span.tgme_widget_message_meta', 0)->find('time', 0)->datetime;
$item['enclosures'] = $this->enclosures;
// Hard-coded upper bound of 100 loops
while ($pages < $max_pages && $pages < 100) {
$pages++;
$messageOwner = $messageDiv->find('a.tgme_widget_message_owner_name', 0);
if ($messageOwner) {
$item['author'] = html_entity_decode(trim($messageOwner->plaintext), ENT_QUOTES);
$dom = getSimpleHTMLDOM($url);
$channelTitle = $dom->find('div.tgme_channel_info_header_title span', 0)->plaintext ?? '';
$channelTitle = htmlspecialchars_decode($channelTitle, ENT_QUOTES);
$this->feedName = $channelTitle . ' (@' . $this->normalizeUsername() . ')';
$messages = $dom->find('div.tgme_widget_message_wrap.js-widget_message_wrap');
if (!$channelTitle && !$messages) {
throw new \Exception('Unable to find channel. The channel is non-existing or non-public.');
}
$this->items[] = $item;
foreach (array_reverse($messages) as $message) {
$this->itemTitle = '';
$this->enclosures = [];
$item = [];
$item['uri'] = $message->find('a.tgme_widget_message_date', 0)->href;
$item['content'] = $this->processContent($message);
$item['title'] = $this->itemTitle;
$item['timestamp'] = $message->find('span.tgme_widget_message_meta', 0)->find('time', 0)->datetime;
$item['enclosures'] = $this->enclosures;
$messageOwner = $message->find('a.tgme_widget_message_owner_name', 0);
if ($messageOwner) {
$item['author'] = html_entity_decode(trim($messageOwner->plaintext), ENT_QUOTES);
}
array_unshift($this->items, $item);
}
$more = $dom->find('> div.tgme_widget_message_centered.js-messages_more_wrap a', 0);
if ($more && str_contains($more->href, 'before')) {
$url = 'https://t.me/' . $more->href;
} else {
break;
}
}
$this->logger->info(sprintf('Fetched %s messages from %s pages (%s)', count($this->items), $pages, $url));
$this->items = array_reverse($this->items);
}
@@ -369,12 +400,7 @@ EOD;
private function normalizeUsername()
{
// todo: can be replaced with ltrim($username, '@');
$username = $this->getInput('username');
if (substr($username, 0, 1) === '@') {
return substr($username, 1);
}
return $username;
return ltrim($this->getInput('username'), '@');
}
public function detectParameters($url)

View File

@@ -56,8 +56,7 @@ class TestFaktaBridge extends BridgeAbstract
public function collectData()
{
$NEWSURL = self::URI . '/sv';
$html = getSimpleHTMLDOMCached($NEWSURL, 18000) or
returnServerError('Could not request: ' . $NEWSURL);
$html = getSimpleHTMLDOMCached($NEWSURL, 18000);
foreach ($html->find('.row-container') as $element) {
// Debug::log($element);
@@ -68,8 +67,7 @@ class TestFaktaBridge extends BridgeAbstract
$figure = $element->find('img', 0);
$preamble = trim($element->find('.text', 0)->plaintext);
$article_html = getSimpleHTMLDOMCached($url, 18000) or
returnServerError('Could not request: ' . $url);
$article_html = getSimpleHTMLDOMCached($url, 18000);
$article_content = $article_html->find('div.content', 0);
$article_text = $article_html->find('article', 0);

View File

@@ -56,7 +56,8 @@ class TldrTechBridge extends BridgeAbstract
if ($child->tag != 'a') {
continue;
}
$this->extractItem(Url::fromString(self::URI . $child->href));
$itemUrl = Url::fromString(self::URI . ltrim($child->href, '/'));
$this->extractItem($itemUrl);
if (count($this->items) >= $limit) {
break;
}

View File

@@ -10,8 +10,7 @@ class UsesTechbridge extends BridgeAbstract
public function collectData()
{
$html = getSimpleHTMLDOM(self::URI)
or returnServerError('Could not request: ' . self::URI);
$html = getSimpleHTMLDOM(self::URI);
foreach ($html->find('div[class=PersonInner]') as $index => $a) {
$item = []; // Create an empty item

View File

@@ -2,7 +2,7 @@
class VkBridge extends BridgeAbstract
{
const MAINTAINER = 'em92';
// const MAINTAINER = 'em92';
// const MAINTAINER = 'pmaziere';
// const MAINTAINER = 'ahiles3005';
const NAME = 'VK.com';

View File

@@ -16,8 +16,7 @@ class VproTegenlichtBridge extends BridgeAbstract
public function collectData()
{
$url = sprintf('https://www.vpro.nl/programmas/tegenlicht/lees/artikelen.html');
$dom = getSimpleHTMLDOM($url)
or returnServerError('No contents received!');
$dom = getSimpleHTMLDOM($url);
$dom = $dom->find('ul#browsable-news-overview', 0);
$dom = defaultLinkTo($dom, $this->getURI());
foreach ($dom->find('li') as $article) {

View File

@@ -105,10 +105,6 @@ class WikipediaBridge extends BridgeAbstract
// This will automatically send us to the correct main page in any language (try it!)
$html = getSimpleHTMLDOM($this->getURI() . '/wiki');
if (!$html) {
returnServerError('Could not load site: ' . $this->getURI() . '!');
}
/*
* Now read content depending on the language (make sure to create one function per language!)
* We build the function name automatically, just make sure you create a private function ending

View File

@@ -1,149 +0,0 @@
<?php
class WorldCosplayBridge extends BridgeAbstract
{
const NAME = 'WorldCosplay Bridge';
const URI = 'https://worldcosplay.net/';
const DESCRIPTION = 'Returns WorldCosplay photos';
const MAINTAINER = 'AxorPL';
const API_CHARACTER = 'api/photo/list.json?character_id=%u&limit=%u';
const API_COSPLAYER = 'api/member/photos.json?member_id=%u&limit=%u';
const API_SERIES = 'api/photo/list.json?title_id=%u&limit=%u';
const API_TAG = 'api/tag/photo_list.json?id=%u&limit=%u';
const CONTENT_HTML
= '<a href="%s" target="_blank"><img src="%s" alt="%s" title="%s"></a>';
const ERR_CONTEXT = 'No context provided';
const ERR_QUERY = 'Unable to query: %s';
const LIMIT_MIN = 1;
const LIMIT_MAX = 24;
const PARAMETERS = [
'Character' => [
'cid' => [
'name' => 'Character ID',
'type' => 'number',
'required' => true,
'title' => 'WorldCosplay character ID',
'exampleValue' => 18204
]
],
'Cosplayer' => [
'uid' => [
'name' => 'Cosplayer ID',
'type' => 'number',
'required' => true,
'title' => 'Cosplayer\'s WorldCosplay profile ID',
'exampleValue' => 406782
]
],
'Series' => [
'sid' => [
'name' => 'Series ID',
'type' => 'number',
'required' => true,
'title' => 'WorldCosplay series ID',
'exampleValue' => 3139
]
],
'Tag' => [
'tid' => [
'name' => 'Tag ID',
'type' => 'number',
'required' => true,
'title' => 'WorldCosplay tag ID',
'exampleValue' => 33643
]
],
'global' => [
'limit' => [
'name' => 'Limit',
'type' => 'number',
'required' => false,
'title' => 'Maximum number of photos to return',
'exampleValue' => 5,
'defaultValue' => 5
]
]
];
public function collectData()
{
$limit = $this->getInput('limit');
$limit = min(self::LIMIT_MAX, max(self::LIMIT_MIN, $limit));
switch ($this->queriedContext) {
case 'Character':
$id = $this->getInput('cid');
$url = self::API_CHARACTER;
break;
case 'Cosplayer':
$id = $this->getInput('uid');
$url = self::API_COSPLAYER;
break;
case 'Series':
$id = $this->getInput('sid');
$url = self::API_SERIES;
break;
case 'Tag':
$id = $this->getInput('tid');
$url = self::API_TAG;
break;
default:
returnClientError(self::ERR_CONTEXT);
}
$url = self::URI . sprintf($url, $id, $limit);
$json = json_decode(getContents($url));
if ($json->has_error) {
returnServerError($json->message);
}
$list = $json->list;
foreach ($list as $img) {
$image = $img->photo ?? $img;
$item = [
'uri' => self::URI . substr($image->url, 1),
'title' => $image->subject,
'author' => $img->member->global_name,
'enclosures' => [$image->large_url],
'uid' => $image->id,
];
// Context cosplayer don't have created_at
if (isset($image->created_at)) {
$item['timestamp'] = $image->created_at;
}
$item['content'] = sprintf(
self::CONTENT_HTML,
$item['uri'],
$item['enclosures'][0],
$item['title'],
$item['title']
);
$this->items[] = $item;
}
}
public function getName()
{
switch ($this->queriedContext) {
case 'Character':
$id = $this->getInput('cid');
break;
case 'Cosplayer':
$id = $this->getInput('uid');
break;
case 'Series':
$id = $this->getInput('sid');
break;
case 'Tag':
$id = $this->getInput('tid');
break;
default:
return parent::getName();
}
return sprintf('%s %u - ', $this->queriedContext, $id) . self::NAME;
}
}

View File

@@ -304,11 +304,9 @@ class XenForoBridge extends BridgeAbstract
// We can optimize performance by caching all but the last page
if ($page != $lastpage) {
$html = getSimpleHTMLDOMCached($pageurl)
or returnServerError('Error loading contents from ' . $pageurl . '!');
$html = getSimpleHTMLDOMCached($pageurl);
} else {
$html = getSimpleHTMLDOM($pageurl)
or returnServerError('Error loading contents from ' . $pageurl . '!');
$html = getSimpleHTMLDOM($pageurl);
}
$html = defaultLinkTo($html, $hosturl);
@@ -347,11 +345,9 @@ class XenForoBridge extends BridgeAbstract
// We can optimize performance by caching all but the last page
if ($page != $lastpage) {
$html = getSimpleHTMLDOMCached($pageurl)
or returnServerError('Error loading contents from ' . $pageurl . '!');
$html = getSimpleHTMLDOMCached($pageurl);
} else {
$html = getSimpleHTMLDOM($pageurl)
or returnServerError('Error loading contents from ' . $pageurl . '!');
$html = getSimpleHTMLDOM($pageurl);
}
$html = defaultLinkTo($html, $hosturl);

View File

@@ -1,12 +1,5 @@
<?php
/**
* RssBridgeYoutube
* Returns the newest videos
* WARNING: to parse big playlists (over ~90 videos), you need to edit simple_html_dom.php:
* change: define('MAX_FILE_SIZE', 600000);
* into: define('MAX_FILE_SIZE', 900000); (or more)
*/
class YoutubeBridge extends BridgeAbstract
{
const NAME = 'YouTube Bridge';

View File

@@ -21,20 +21,15 @@
;enabled_bridges[] = ThePirateBay
;enabled_bridges[] = TikTokBridge
;enabled_bridges[] = Twitch
;enabled_bridges[] = Vk
;enabled_bridges[] = XPathBridge
;enabled_bridges[] = Youtube
;enabled_bridges[] = YouTubeCommunityTabBridge
enabled_bridges[] = *
; Defines the timezone used by RSS-Bridge
; Find a list of supported timezones at
; https://www.php.net/manual/en/timezones.php
; timezone = "UTC" (default)
timezone = "UTC"
; Display a system message to users.
message = ""
;message = "Hello world"
; Whether to enable debug mode.
enable_debug_mode = false
@@ -46,14 +41,18 @@ enable_debug_mode = false
; Whether to enable maintenance mode. If enabled, feed requests receive 503 Service Unavailable
enable_maintenance_mode = false
; Max file size for simple_html_dom in bytes (10000000 => 10 MB)
max_file_size = 10000000
[http]
; Operation timeout in seconds
timeout = 15
timeout = 5
; Operation retry count in case of curl error
retries = 2
retries = 1
; User agent
; Curl user agent
useragent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0"
; Max http response size in MB
@@ -70,6 +69,7 @@ type = "file"
custom_timeout = false
[admin]
; Advertise an email address where people can reach the administrator.
; This address is displayed on the main page, visible to everyone!
; "" = Disabled (default)
@@ -86,6 +86,7 @@ telegram = ""
donations = true
[proxy]
; The HTTP proxy to tunnel requests through
; https://curl.se/libcurl/c/CURLOPT_PROXY.html
; "" = Proxy disabled (default)
@@ -135,6 +136,7 @@ report_limit = 1
; --- Cache specific configuration ---------------------------------------------
[FileCache]
; The root folder to store files in.
; "" = Use the cache folder in the repository (default)
path = ""
@@ -142,6 +144,7 @@ path = ""
enable_purge = true
[SQLiteCache]
; Filepath of the sqlite db file
file = "cache.sqlite"
; Whether to actually delete data when purging
@@ -150,11 +153,17 @@ enable_purge = true
timeout = 5000
[MemcachedCache]
host = "localhost"
port = 11211
; --- Bridge specific configuration ------
[TelegramBridge]
; Max pages to fetch (1 page => 20 messages), min=1 max=100
max_pages = 1
[DiscogsBridge]
; Sets the personal access token for interactions with Discogs. When

View File

@@ -3,7 +3,7 @@
| Country | Address | Status | Contact | Comment |
|:-------:|---------|--------|----------|---------|
| ![](https://iplookup.flagfox.net/images/h16/GB.png) | https://rss-bridge.org/bridge01 | ![](https://img.shields.io/website/https/rss-bridge.org/bridge01.svg) | [@dvikan](https://github.com/dvikan) | London, Digital Ocean|
| ![](https://iplookup.flagfox.net/images/h16/FR.png) | https://rssbridge.flossboxin.org.in | ![](https://img.shields.io/badge/website-up-brightgreen) | [@vdbhb59](https://github.com/vdbhb59) | Hosted with OVH SAS (Maintained in India) |
| ![](https://iplookup.flagfox.net/images/h16/FR.png) | https://rssbridge.flossboxin.org.in | ![](https://img.shields.io/badge/website-up-brightgreen) | [@vdbhb59](https://github.com/vdbhb59) | Hosted with Netcup Germany (Maintained in India) |
| ![](https://iplookup.flagfox.net/images/h16/FR.png) | https://rss-bridge.cheredeprince.net | ![](https://img.shields.io/website/https/rss-bridge.cheredeprince.net) | [@La_Bécasse](https://cheredeprince.net/contact) | Self-Hosted at home in France |
| ![](https://iplookup.flagfox.net/images/h16/FR.png) | https://rss-bridge.sans-nuage.fr | ![](https://img.shields.io/website/https/rss-bridge.sans-nuage.fr) | [@Alsace Réseau Neutre](https://arn-fai.net/contact) | Hosted in Alsace, France |
| ![](https://iplookup.flagfox.net/images/h16/GB.png) | https://rss-bridge.lewd.tech | ![](https://img.shields.io/website/https/rss-bridge.lewd.tech.svg) | [@Erisa](https://github.com/Erisa) | Hosted in London, protected by Cloudflare Rate Limiting |

View File

@@ -0,0 +1,12 @@
# TelegramBridge
By default, it fetches a single page with up to 20 messages.
To increase this limit, tweak the `max_pages` config:
```ini
[TelegramBridge]
; Fetch a maximum of 3 pages (requires 3 http requests)
max_pages = 3
```

View File

@@ -80,14 +80,8 @@ class MrssFormat extends FormatAbstract
$feedUrl = get_current_url();
$linkSelf->setAttribute('href', $feedUrl);
} elseif ($feedKey === 'icon') {
$allowedIconExtensions = [
'.gif',
'.jpg',
'.png',
'.ico',
];
$icon = $feedValue;
if ($icon && in_array(substr($icon, -4), $allowedIconExtensions)) {
if ($icon) {
$feedImage = $document->createElement('image');
$channel->appendChild($feedImage);
$iconUrl = $document->createElement('url');

View File

@@ -44,6 +44,10 @@ final class BridgeCard
data-short-name="$shortName"
>
<a style="position: absolute; top: 10px; left: 10px" href="#bridge-{$bridgeClassName}">
<h1>#</h1>
<a>
<h2><a href="{$uri}">{$name}</a></h2>
<p class="description">{$description}</p>

View File

@@ -7,7 +7,7 @@
*/
final class Configuration
{
private const VERSION = '2024-02-02';
private const VERSION = '2025-01-26';
private static $config = [];

View File

@@ -174,7 +174,7 @@ final class FeedParser
}
foreach ($namespaces as $namespaceName => $namespaceUrl) {
if (in_array($namespaceName, ['', 'content', 'media'])) {
if (in_array($namespaceName, ['', 'content'])) {
continue;
}
$item[$namespaceName] = $this->parseModule($feedItem, $namespaceName, $namespaceUrl);
@@ -250,11 +250,17 @@ final class FeedParser
private function parseModule(\SimpleXMLElement $element, string $namespaceName, string $namespaceUrl): array
{
// Unfortunately this parses out only node values as string
// TODO: parse attributes too
$result = [];
$module = $element->children($namespaceUrl);
foreach ($module as $name => $value) {
// todo: add custom parsing if it's something other than a string
$result[$name] = (string) $value;
if (get_class($value) === 'SimpleXMLElement' && $value->count() !== 0) {
$result[$name] = $this->parseModule($value, $namespaceName, $namespaceUrl);
} else {
$result[$name] = (string) $value;
}
}
return $result;
}

View File

@@ -7,10 +7,6 @@ if (is_file(__DIR__ . '/../vendor/autoload.php')) {
const PATH_LIB_CACHES = __DIR__ . '/../caches/';
const PATH_CACHE = __DIR__ . '/../cache/';
// Allow larger files for simple_html_dom
// todo: extract to config (if possible)
const MAX_FILE_SIZE = 10000000;
// Files
$files = [
__DIR__ . '/../lib/html.php',

View File

@@ -220,7 +220,7 @@ final class Request
return $clone;
}
public function attribute(string $key, $default = null)
public function getAttribute(string $key, $default = null)
{
return $this->attributes[$key] ?? $default;
}

View File

@@ -114,8 +114,9 @@ function str_get_html(
if (empty($str)) {
throw new \Exception('Refusing to parse empty string input');
}
if (strlen($str) > MAX_FILE_SIZE) {
throw new \Exception('Refusing to parse too big input');
if (strlen($str) > Configuration::getConfig('system', 'max_file_size')) {
throw new \Exception('simple_html_dom: Refusing to parse too big input: ' . strlen($str));
}
return $dom->load($str, $lowercase, $stripRN);

View File

@@ -111,6 +111,9 @@ final class Url
if (!str_starts_with($path, '/')) {
throw new UrlException(sprintf('Path must start with forward slash: %s', $path));
}
if (str_starts_with($path, '//')) {
throw new UrlException(sprintf('Illegal path (too many forward slashes): %s', $path));
}
$clone = clone $this;
$clone->path = $path;
return $clone;

View File

@@ -13,7 +13,7 @@ class CacheMiddleware implements Middleware
public function __invoke(Request $request, $next): Response
{
$action = $request->attribute('action');
$action = $request->getAttribute('action');
if ($action !== 'DisplayAction') {
// We only cache DisplayAction (for now)
@@ -43,9 +43,14 @@ class CacheMiddleware implements Middleware
/** @var Response $response */
$response = $next($request);
if (in_array($response->getCode(), [403, 429, 500, 503])) {
if ($response->getCode() === 200) {
// Do nothing because DisplayAction has already cached this on $cacheKey
} elseif (in_array($response->getCode(), [400, 403, 404, 429, 500, 503])) {
// Cache these responses for about ~10 mins on average
$this->cache->set($cacheKey, $response, 60 * 5 + rand(1, 60 * 10));
} else {
// Should never happen
$this->cache->set($cacheKey, $response, 60 * 5);
}
// For 1% of requests, prune cache

View File

@@ -10,20 +10,24 @@ class TokenAuthenticationMiddleware implements Middleware
return $next($request);
}
// Always add token to request attribute
$request = $request->withAttribute('token', $request->get('token'));
$token = $request->get('token');
if (! $request->attribute('token')) {
if (! $token) {
return new Response(render(__DIR__ . '/../templates/token.html.php', [
'message' => 'Missing token',
'message' => 'Missing token',
'token' => '',
]), 401);
}
if (! hash_equals(Configuration::getConfig('authentication', 'token'), $request->attribute('token'))) {
if (! hash_equals(Configuration::getConfig('authentication', 'token'), $token)) {
return new Response(render(__DIR__ . '/../templates/token.html.php', [
'message' => 'Invalid token',
'message' => 'Invalid token',
'token' => $token,
]), 401);
}
$request = $request->withAttribute('token', $token);
return $next($request);
}
}

View File

@@ -186,6 +186,7 @@ section li {
margin-left: 1em;
}
.bridge-card {
position: relative;
text-align: center;
}

View File

@@ -13,8 +13,8 @@
<?= e($message) ?>
</p>
<form action="" method="get">
<form action="" method="get" autocomplete="off">
<label for="token">Token:</label>
<input type="password" name="token" id="token" placeholder="token">
<input type="text" name="token" id="token" placeholder="token" value="<?= e($token) ?>">
<input type="submit" value="OK">
</form>

View File

@@ -183,4 +183,83 @@ class FeedParserTest extends TestCase
];
$this->assertEquals($expected, $feed);
}
public function testYoutubeMediaModule()
{
$xml = <<<XML
<?xml version="1.0" encoding="UTF-8"?>
<feed xmlns:yt="http://www.youtube.com/xml/schemas/2015" xmlns:media="http://search.yahoo.com/mrss/" xmlns="http://www.w3.org/2005/Atom">
<link rel="self" href="http://www.youtube.com/feeds/videos.xml?channel_id=UCuCkxoKLYO_EQ2GeFtbM_bw"/>
<id>yt:channel:uCkxoKLYO_EQ2GeFtbM_bw</id>
<yt:channelId>uCkxoKLYO_EQ2GeFtbM_bw</yt:channelId>
<title>Half as Interesting</title>
<link rel="alternate" href="https://www.youtube.com/channel/UCuCkxoKLYO_EQ2GeFtbM_bw"/>
<author>
<name>Half as Interesting</name>
<uri>https://www.youtube.com/channel/UCuCkxoKLYO_EQ2GeFtbM_bw</uri>
</author>
<published>2017-08-26T20:06:05+00:00</published>
<entry>
<id>yt:video:Upjg7F28DJw</id>
<yt:videoId>Upjg7F28DJw</yt:videoId>
<yt:channelId>UCuCkxoKLYO_EQ2GeFtbM_bw</yt:channelId>
<title>The Nuke-Proof US Military Base in a Mountain</title>
<link rel="alternate" href="https://www.youtube.com/watch?v=Upjg7F28DJw"/>
<author>
<name>Half as Interesting</name>
<uri>https://www.youtube.com/channel/UCuCkxoKLYO_EQ2GeFtbM_bw</uri>
</author>
<published>2025-01-24T15:44:18+00:00</published>
<updated>2025-01-25T06:55:19+00:00</updated>
<media:group>
<media:title>The Nuke-Proof US Military Base in a Mountain</media:title>
<media:content url="https://www.youtube.com/v/Upjg7F28DJw?version=3" type="application/x-shockwave-flash" width="640" height="390"/>
<media:thumbnail url="https://i2.ytimg.com/vi/Upjg7F28DJw/hqdefault.jpg" width="480" height="360"/>
<media:description>Receive 10% off anything on bellroy.com: https://bit.ly/3HdOWu9</media:description>
<media:community>
<media:starRating count="10157" average="5.00" min="1" max="5"/>
<media:statistics views="228462"/>
</media:community>
</media:group>
</entry>
</feed>
XML;
$feed = $this->sut->parseFeed($xml);
$expected = [
'title' => 'Half as Interesting',
'uri' => 'https://www.youtube.com/channel/UCuCkxoKLYO_EQ2GeFtbM_bw',
'icon' => null,
'items' => [
[
'uri' => 'https://www.youtube.com/watch?v=Upjg7F28DJw',
'title' => 'The Nuke-Proof US Military Base in a Mountain',
'content' => '',
'timestamp' => 1737788119,
'author' => 'Half as Interesting',
'id' => 'yt:video:Upjg7F28DJw',
'published' => '2025-01-24T15:44:18+00:00',
'updated' => '2025-01-25T06:55:19+00:00',
'link' => '',
'yt' => [
'videoId' => 'Upjg7F28DJw',
'channelId' => 'UCuCkxoKLYO_EQ2GeFtbM_bw',
],
'media' => [
'group' => [
'title' => 'The Nuke-Proof US Military Base in a Mountain',
'content' => '',
'thumbnail' => '',
'description' => 'Receive 10% off anything on bellroy.com: https://bit.ly/3HdOWu9',
'community' => [
'starRating' => '',
'statistics' => '',
],
],
],
]
],
];
$this->assertEquals($expected, $feed);
}
}

View File

@@ -36,6 +36,12 @@ class UrlTest extends TestCase
}
}
public function testIllegalPath()
{
$this->expectException(\UrlException::class);
Url::fromString('https://example.com//foo');
}
public function testMutation()
{
$this->assertSame('http://example.com/foo', (Url::fromString('http://example.com/'))->withPath('/foo')->__toString());