2019-03-23 16:22:44 +01:00
|
|
|
|
<?php
|
|
|
|
|
|
|
|
|
|
class HeiseBridge extends FeedExpander
|
|
|
|
|
{
|
|
|
|
|
const MAINTAINER = 'Dreckiger-Dan';
|
|
|
|
|
const NAME = 'Heise Online Bridge';
|
|
|
|
|
const URI = 'https://heise.de/';
|
|
|
|
|
const CACHE_TIMEOUT = 1800; // 30min
|
|
|
|
|
const DESCRIPTION = 'Returns the full articles instead of only the intro';
|
|
|
|
|
const PARAMETERS = [[
|
|
|
|
|
'category' => [
|
|
|
|
|
'name' => 'Category',
|
|
|
|
|
'type' => 'list',
|
|
|
|
|
'values' => [
|
2023-01-27 02:54:08 +01:00
|
|
|
|
// source: https://www.heise.de/news-extern/news.html
|
|
|
|
|
'heise online News'
|
|
|
|
|
=> 'https://www.heise.de/rss/heise-atom.xml',
|
|
|
|
|
'heise online IT'
|
|
|
|
|
=> 'https://www.heise.de/rss/heise-Rubrik-IT-atom.xml',
|
|
|
|
|
'heise online Wissen'
|
|
|
|
|
=> 'https://www.heise.de/rss/heise-Rubrik-Wissen-atom.xml',
|
|
|
|
|
'heise online Mobiles'
|
|
|
|
|
=> 'https://www.heise.de/rss/heise-Rubrik-Mobiles-atom.xml',
|
|
|
|
|
'heise online Entertainment'
|
|
|
|
|
=> 'https://www.heise.de/rss/heise-Rubrik-Entertainment-atom.xml',
|
|
|
|
|
'heise online Netzpolitik'
|
|
|
|
|
=> 'https://www.heise.de/rss/heise-Rubrik-Netzpolitik-atom.xml',
|
|
|
|
|
'heise online Wirtschaft'
|
|
|
|
|
=> 'https://www.heise.de/rss/heise-Rubrik-Wirtschaft-atom.xml',
|
|
|
|
|
'heise online Journal'
|
|
|
|
|
=> 'https://www.heise.de/rss/heise-Rubrik-Journal-atom.xml',
|
|
|
|
|
'heise online Top-News'
|
|
|
|
|
=> 'https://www.heise.de/rss/heise-top-atom.xml',
|
|
|
|
|
//'iMonitor – Internet-Störungen'
|
|
|
|
|
//=> 'https://www.heise.de/netze/netzwerk-tools/imonitor-internet-stoerungen/feed/aktuelle-meldungen/',
|
|
|
|
|
//'heise tipps+tricks 🦄💻📱'
|
|
|
|
|
//=> 'https://www.heise.de/rss/tipps-und-tricks-atom.xml',
|
|
|
|
|
'Alle Inhalte von heise+'
|
|
|
|
|
=> 'https://www.heise.de/rss/heiseplus-atom.xml',
|
|
|
|
|
'heise Autos News'
|
|
|
|
|
=> 'https://www.heise.de/autos/rss/news-atom.xml',
|
|
|
|
|
'heise Developer - Neueste Meldungen'
|
|
|
|
|
=> 'https://www.heise.de/developer/rss/news-atom.xml',
|
|
|
|
|
'Der Dotnet-Doktor'
|
|
|
|
|
=> 'https://www.heise.de/developer/rss/dotnet-doktor-blog-atom.xml',
|
|
|
|
|
'the next big thing'
|
|
|
|
|
=> 'https://www.heise.de/developer/rss/next-big-thing-blog-atom.xml',
|
|
|
|
|
'Tales from the Web side'
|
|
|
|
|
=> 'https://www.heise.de/developer/rss/tales-from-the-web-side-blog-atom.xml',
|
|
|
|
|
'Continuous Architecture'
|
|
|
|
|
=> 'https://www.heise.de/developer/rss/continuous-architecture-blog-atom.xml',
|
|
|
|
|
'Der Pragmatische Architekt'
|
|
|
|
|
=> 'https://www.heise.de/developer/rss/der-pragmatische-architekt-blog-atom.xml',
|
|
|
|
|
'Modernes C++'
|
|
|
|
|
=> 'https://www.heise.de/developer/rss/modernes-cplusplus-blog-atom.xml',
|
|
|
|
|
'colspan'
|
|
|
|
|
=> 'https://www.heise.de/developer/rss/colspan-dev-blog-atom.xml',
|
|
|
|
|
'"Ich roll\' dann mal aus"'
|
|
|
|
|
=> 'https://www.heise.de/developer/rss/ich-roll-dann-mal-aus-atom.xml',
|
|
|
|
|
'Well Organized'
|
|
|
|
|
=> 'https://www.heise.de/developer/rss/well-organized-blog-atom.xml',
|
|
|
|
|
'Neuigkeiten von der Insel'
|
|
|
|
|
=> 'https://www.heise.de/developer/rss/neuigkeiten-von-der-insel-blog-atom.xml',
|
|
|
|
|
'Von Menschen und Maschinen'
|
|
|
|
|
=> 'https://www.heise.de/developer/rss/von-menschen-und-maschinen-blog-atom.xml',
|
|
|
|
|
'heise Foto'
|
|
|
|
|
=> 'https://www.heise.de/foto/rss/news-atom.xml',
|
|
|
|
|
//'Top-Programme bei heise Download'
|
|
|
|
|
//=> 'https://www.heise.de/download/feed/top',
|
|
|
|
|
'heise Security'
|
|
|
|
|
=> 'https://www.heise.de/security/rss/news-atom.xml',
|
|
|
|
|
'Security-Alert Meldungen'
|
|
|
|
|
=> 'https://www.heise.de/security/rss/alert-news-atom.xml',
|
|
|
|
|
'c\'t-Blog'
|
|
|
|
|
=> 'https://www.heise.de/ct/blog/blog-atom.xml',
|
|
|
|
|
'c\'t-Blog Labs'
|
|
|
|
|
=> 'https://www.heise.de/ct/blog/blog-ctlabs-atom.xml',
|
|
|
|
|
'c\'t-Blog Fair & Green IT'
|
|
|
|
|
=> 'https://www.heise.de/ct/blog/blog-fgit-atom.xml',
|
|
|
|
|
'c\'t-Blog RTFM'
|
|
|
|
|
=> 'https://www.heise.de/ct/blog/blog-rtfm-atom.xml',
|
|
|
|
|
'c\'t-Themen'
|
|
|
|
|
=> 'https://www.heise.de/ct/rss/artikel-atom.xml',
|
|
|
|
|
'Make - Neueste Meldungen'
|
|
|
|
|
=> 'https://www.heise.de/make/rss/hardware-hacks-atom.xml',
|
|
|
|
|
'iX News'
|
|
|
|
|
=> 'https://www.heise.de/ix/rss/news-atom.xml',
|
|
|
|
|
'Mac & i'
|
|
|
|
|
=> 'https://www.heise.de/mac-and-i/news-atom.xml',
|
|
|
|
|
'MIT Technology Review'
|
|
|
|
|
=> 'https://www.heise.de/tr/rss/news-atom.xml',
|
|
|
|
|
'MIT Technology Review Blog'
|
|
|
|
|
=> 'https://www.heise.de/tr/rss/blog-atom.xml',
|
|
|
|
|
//'Telepolis'
|
|
|
|
|
//=> 'https://www.heise.de/tp/news-atom.xml',
|
|
|
|
|
//'Aktuelle News von TechStage'
|
|
|
|
|
//=> 'https://www.techstage.de/rss.xml',
|
2022-07-01 15:10:30 +02:00
|
|
|
|
]
|
2019-03-23 16:22:44 +01:00
|
|
|
|
],
|
|
|
|
|
'limit' => [
|
|
|
|
|
'name' => 'Limit',
|
|
|
|
|
'type' => 'number',
|
|
|
|
|
'required' => false,
|
|
|
|
|
'title' => 'Specify number of full articles to return',
|
|
|
|
|
'defaultValue' => 5
|
2023-08-03 22:43:55 +02:00
|
|
|
|
],
|
|
|
|
|
'sessioncookie' => [
|
|
|
|
|
'name' => 'Session Cookie',
|
|
|
|
|
'required' => false,
|
|
|
|
|
'title' => <<<'TITLE'
|
|
|
|
|
If you have a heise+ subscription,
|
|
|
|
|
you can enter your cookie (ssohls) here to
|
|
|
|
|
have heise+ articles displayed in full.
|
|
|
|
|
By default the cookie is 1 year valid.
|
|
|
|
|
TITLE,
|
2022-07-01 15:10:30 +02:00
|
|
|
|
]
|
2019-03-23 16:22:44 +01:00
|
|
|
|
]];
|
|
|
|
|
const LIMIT = 5;
|
|
|
|
|
|
|
|
|
|
public function collectData()
|
|
|
|
|
{
|
|
|
|
|
$this->collectExpandableDatas(
|
|
|
|
|
$this->getInput('category'),
|
|
|
|
|
$this->getInput('limit') ?: static::LIMIT
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
2023-10-13 01:59:05 +02:00
|
|
|
|
protected function parseItem(array $item)
|
2019-03-23 16:22:44 +01:00
|
|
|
|
{
|
2023-08-03 22:43:55 +02:00
|
|
|
|
$sessioncookie = $this->getInput('sessioncookie');
|
2019-03-23 16:22:44 +01:00
|
|
|
|
|
2023-04-20 23:02:08 +02:00
|
|
|
|
// strip rss parameter
|
|
|
|
|
$item['uri'] = explode('?', $item['uri'])[0];
|
|
|
|
|
|
|
|
|
|
// ignore TechStage articles
|
2022-03-26 02:30:21 +01:00
|
|
|
|
if (strpos($item['uri'], 'https://www.heise.de') !== 0) {
|
|
|
|
|
return $item;
|
|
|
|
|
}
|
2023-08-27 12:54:02 +02:00
|
|
|
|
// abort on heise+ articles
|
2023-08-03 22:43:55 +02:00
|
|
|
|
if ($sessioncookie == '' && str_starts_with($item['title'], 'heise+ |')) {
|
2023-04-20 23:02:08 +02:00
|
|
|
|
return $item;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
$item['uri'] .= '?seite=all';
|
2023-08-03 22:43:55 +02:00
|
|
|
|
$article = getSimpleHTMLDOM($item['uri'], [
|
|
|
|
|
'cookie: ssohls=' . $sessioncookie
|
|
|
|
|
]);
|
2019-03-23 16:22:44 +01:00
|
|
|
|
|
2020-11-10 07:14:09 +01:00
|
|
|
|
if ($article) {
|
2022-02-24 19:41:42 +01:00
|
|
|
|
$article = defaultLinkTo($article, $item['uri']);
|
2020-11-10 07:14:09 +01:00
|
|
|
|
$item = $this->addArticleToItem($item, $article);
|
|
|
|
|
}
|
2019-03-23 16:22:44 +01:00
|
|
|
|
|
|
|
|
|
return $item;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private function addArticleToItem($item, $article)
|
|
|
|
|
{
|
2022-09-21 21:31:43 +02:00
|
|
|
|
// relink URIs, as the previous a-img tags weren't recognized by this function
|
|
|
|
|
$article = defaultLinkTo($article, $item['uri']);
|
|
|
|
|
|
|
|
|
|
// remove unwanted stuff
|
2024-04-18 13:18:45 +02:00
|
|
|
|
foreach (
|
|
|
|
|
$article->find('figure.branding, figure.a-inline-image, a-ad, div.ho-text, a-img,
|
|
|
|
|
.a-toc__list, a-collapse, .opt-in__description, .opt-in__footnote') as $element
|
|
|
|
|
) {
|
2022-09-21 21:31:43 +02:00
|
|
|
|
$element->remove();
|
|
|
|
|
}
|
2024-06-16 13:16:42 +02:00
|
|
|
|
foreach ($article->find('img') as $element) {
|
|
|
|
|
if (str_contains($element->alt, 'l+f')) {
|
|
|
|
|
$element->remove();
|
|
|
|
|
}
|
|
|
|
|
}
|
2022-09-21 21:31:43 +02:00
|
|
|
|
// reload html, as remove() is buggy
|
|
|
|
|
$article = str_get_html($article->outertext);
|
|
|
|
|
|
|
|
|
|
$header = $article->find('header.a-article-header', 0);
|
2022-10-27 19:59:45 +02:00
|
|
|
|
if ($header) {
|
2023-01-17 17:01:09 +01:00
|
|
|
|
$headerElements = $header->find('p, figure img, noscript img');
|
2022-10-27 19:59:45 +02:00
|
|
|
|
$item['content'] = implode('', $headerElements);
|
2022-09-21 21:31:43 +02:00
|
|
|
|
|
2023-05-07 12:33:45 +02:00
|
|
|
|
$authors = $header->find('.creator__names .creator__name');
|
2022-10-27 19:59:45 +02:00
|
|
|
|
if ($authors) {
|
|
|
|
|
$item['author'] = implode(', ', array_map(function ($e) {
|
|
|
|
|
return $e->plaintext;
|
|
|
|
|
}, $authors));
|
|
|
|
|
}
|
2022-07-01 15:10:30 +02:00
|
|
|
|
}
|
2019-03-23 16:22:44 +01:00
|
|
|
|
|
2024-03-28 19:42:41 +01:00
|
|
|
|
//fix for embbedded youtube-videos
|
|
|
|
|
$oldlink = '';
|
|
|
|
|
foreach ($article->find('div.video__yt-container') as &$ytvideo) {
|
|
|
|
|
if (preg_match('/www.youtube.*?\"/', $ytvideo->innertext, $link) && $link[0] != $oldlink) {
|
|
|
|
|
//save link to prevent duplicates
|
|
|
|
|
$oldlink = $link[0];
|
|
|
|
|
$ytiframe = <<<EOT
|
|
|
|
|
<iframe width="560" height="315" src="https://$link[0] title="YouTube video player" frameborder="0"
|
|
|
|
|
allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share"
|
|
|
|
|
referrerpolicy="strict-origin-when-cross-origin" allowfullscreen></iframe>
|
|
|
|
|
EOT;
|
|
|
|
|
//check if video is in header or article for correct possitioning
|
|
|
|
|
if (strpos($header->innertext, $link[0])) {
|
|
|
|
|
$item['content'] .= $ytiframe;
|
|
|
|
|
} else {
|
|
|
|
|
$ytvideo->innertext .= $ytiframe;
|
|
|
|
|
$reloadneeded = 1;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (isset($reloadneeded)) {
|
|
|
|
|
$article = str_get_html($article->outertext);
|
|
|
|
|
}
|
|
|
|
|
|
2024-02-08 15:35:24 +01:00
|
|
|
|
$categories = $article->find('.article-footer__topics ul.topics li.topics__item a-topic a');
|
2023-05-07 12:33:45 +02:00
|
|
|
|
foreach ($categories as $category) {
|
|
|
|
|
$item['categories'][] = trim($category->plaintext);
|
|
|
|
|
}
|
|
|
|
|
|
2022-09-21 21:31:43 +02:00
|
|
|
|
$content = $article->find('.article-content', 0);
|
2022-10-27 19:59:45 +02:00
|
|
|
|
if ($content) {
|
|
|
|
|
$contentElements = $content->find(
|
2024-06-10 19:40:07 +02:00
|
|
|
|
'p, h3, ul, ol, table, pre, noscript img, a-bilderstrecke h2, a-bilderstrecke figure, a-bilderstrecke figcaption, noscript iframe'
|
2022-10-27 19:59:45 +02:00
|
|
|
|
);
|
|
|
|
|
$item['content'] .= implode('', $contentElements);
|
2023-01-17 21:21:32 +01:00
|
|
|
|
}
|
2019-03-23 16:22:44 +01:00
|
|
|
|
|
|
|
|
|
return $item;
|
|
|
|
|
}
|
|
|
|
|
}
|