1
0
mirror of https://github.com/RSS-Bridge/rss-bridge.git synced 2025-10-26 21:11:30 +01:00
Files
php-rss-bridge/bridges/TldrTechBridge.php
Bryan Wall d9f0ef1617 [TldrTechBridge] Rename Webdev and add new channels
TLDR has renamed the 'Webdev' channel to 'Dev' and Webdev has stopped working in the bridge. This PR makes the applicable change.

They have also added new channels for 'Fintech' and 'Data' and 'Coming Soon' channels for 'IT' and 'Hardware'. The URLs and signup pages for the 'Coming Soon' channels exist but they haven't started publishing articles yet. This PR adds options for all of these new channels.
2025-10-24 18:59:56 +02:00

144 lines
4.8 KiB
PHP

<?php
declare(strict_types=1);
class TldrTechBridge extends BridgeAbstract
{
const MAINTAINER = 'sqrtminusone';
const NAME = 'TLDR Tech Newsletter Bridge';
const URI = 'https://tldr.tech/';
const DESCRIPTION = 'Return newsletter articles from TLDR Tech';
const PARAMETERS = [
'' => [
'limit' => [
'name' => 'Maximum number of articles to return',
'type' => 'number',
'required' => true,
'defaultValue' => 10
],
'topic' => [
'name' => 'Topic',
'type' => 'list',
'values' => [
'Tech' => 'tech',
'Dev' => 'dev',
'AI' => 'ai',
'Information Security' => 'infosec',
'Product Management' => 'product',
'DevOps' => 'devops',
'Crypto' => 'crypto',
'Design' => 'design',
'Marketing' => 'marketing',
'Founders' => 'founders',
'Fintech' => 'fintech',
'Data' => 'data',
'IT' => 'it',
'Hardware' => 'hardware',
],
'defaultValue' => 'tech'
]
]
];
public function collectData()
{
$topic = $this->getInput('topic');
$limit = $this->getInput('limit');
$url = self::URI . 'api/latest/' . $topic;
$response = getContents($url, [], [], true);
$location = $response->getHeader('Location');
$locationUrl = Url::fromString($location);
$this->extractItem($locationUrl);
$archives_url = self::URI . $topic . '/archives';
$archives_html = getSimpleHTMLDOM($archives_url);
$entries_root = $archives_html->find('div.content-center.mt-5', 0);
foreach ($entries_root->children() as $child) {
if ($child->tag != 'a') {
continue;
}
$itemUrl = Url::fromString(self::URI . ltrim($child->href, '/'));
if ($itemUrl == $locationUrl) {
continue;
}
$this->extractItem($itemUrl);
if (count($this->items) >= $limit) {
break;
}
}
}
private function extractItem(Url $url)
{
$pathParts = explode('/', $url->getPath());
$date = strtotime(end($pathParts));
try {
[$content, $title] = $this->extractContent($url);
$this->items[] = [
'uri' => (string) $url,
'title' => $title,
'timestamp' => $date,
'content' => $content,
];
} catch (HttpException $e) {
// archive occasionally returns broken URLs
return;
}
}
private function extractContent($url)
{
$html = getSimpleHTMLDOMCached($url);
$content = $html->find('div.content-center.mt-5', 0);
if (!$content) {
throw new \Exception('Could not find content');
}
$subscribe_form = $content->find('div.mt-5 > div > form', 0);
if ($subscribe_form) {
$content->removeChild($subscribe_form->parent->parent);
}
$privacy_link = $content->find("a[href='/privacy']", 0);
if ($privacy_link) {
$content->removeChild($privacy_link->parent->parent);
}
$headers = $content->find('h6.text-center.font-bold');
foreach ($headers as $header) {
$elem = $html->createElement('h3', $header->parent->plaintext);
$elem->style = 'margin-top: 1.2em; margin-bottom: 0.5em;';
$header_root = $header->parent;
foreach ($header_root->children() as $child) {
$header_root->removeChild($child);
}
$header_root->appendChild($elem);
}
foreach ($content->find('a.font-bold') as $a) {
$a->removeAttribute('class');
$elem = $html->createElement('b', $a->plaintext);
$a->removeChild($a->firstChild());
$a->appendChild($elem);
}
foreach ($content->children() as $child) {
if ($child->tag != 'div') {
continue;
}
foreach ($child->children() as $grandchild) {
if ($grandchild->tag == 'div') {
$grandchild->style = 'margin-bottom: 12px;';
}
}
}
foreach ($content->find('section') as $section) {
if (count($section->children()) == 0) {
$content->removeChild($section);
}
}
$title = $content->find('h2', 0);
return [$content->innertext, $title->plaintext];
}
}