2021-01-29 18:27:35 +00:00
|
|
|
<?php
|
|
|
|
|
2022-07-01 15:10:30 +02:00
|
|
|
class FSecureBlogBridge extends BridgeAbstract
|
|
|
|
{
|
|
|
|
const NAME = 'F-Secure Blog';
|
|
|
|
const URI = 'https://blog.f-secure.com';
|
|
|
|
const DESCRIPTION = 'F-Secure Blog';
|
|
|
|
const MAINTAINER = 'simon816';
|
|
|
|
const PARAMETERS = [
|
|
|
|
'' => [
|
|
|
|
'categories' => [
|
|
|
|
'name' => 'Blog categories',
|
|
|
|
'exampleValue' => 'home-security',
|
|
|
|
],
|
|
|
|
'language' => [
|
|
|
|
'name' => 'Language',
|
|
|
|
'required' => true,
|
|
|
|
'defaultValue' => 'en',
|
|
|
|
],
|
|
|
|
'oldest_date' => [
|
|
|
|
'name' => 'Oldest article date',
|
|
|
|
'exampleValue' => '-6 months',
|
|
|
|
],
|
|
|
|
]
|
|
|
|
];
|
2021-01-29 18:27:35 +00:00
|
|
|
|
2022-07-01 15:10:30 +02:00
|
|
|
public function getURI()
|
|
|
|
{
|
|
|
|
$lang = $this->getInput('language') or 'en';
|
|
|
|
if ($lang === 'en') {
|
|
|
|
return self::URI;
|
|
|
|
}
|
|
|
|
return self::URI . "/$lang";
|
|
|
|
}
|
2021-01-29 18:27:35 +00:00
|
|
|
|
2022-07-01 15:10:30 +02:00
|
|
|
public function collectData()
|
|
|
|
{
|
|
|
|
$this->items = [];
|
|
|
|
$this->seen = [];
|
2021-01-29 18:27:35 +00:00
|
|
|
|
2022-07-01 15:10:30 +02:00
|
|
|
$this->oldest = strtotime($this->getInput('oldest_date')) ?: 0;
|
2021-01-29 18:27:35 +00:00
|
|
|
|
2022-07-01 15:10:30 +02:00
|
|
|
$categories = $this->getInput('categories');
|
|
|
|
if (!empty($categories)) {
|
|
|
|
foreach (explode(',', $categories) as $cat) {
|
|
|
|
if (!empty($cat)) {
|
|
|
|
$this->collectCategory($cat);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
2021-01-29 18:27:35 +00:00
|
|
|
|
2022-07-01 15:10:30 +02:00
|
|
|
$html = getSimpleHTMLDOMCached($this->getURI() . '/');
|
2021-01-29 18:27:35 +00:00
|
|
|
|
2022-07-01 15:10:30 +02:00
|
|
|
foreach ($html->find('ul.c-header-menu-desktop__list li a') as $link) {
|
|
|
|
$url = parse_url($link->href);
|
|
|
|
if (($pos = strpos($url['path'], '/category/')) !== false) {
|
|
|
|
$cat = substr($url['path'], $pos + strlen('/category/'), -1);
|
|
|
|
$this->collectCategory($cat);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2021-01-29 18:27:35 +00:00
|
|
|
|
2022-07-01 15:10:30 +02:00
|
|
|
private function collectCategory($category)
|
|
|
|
{
|
|
|
|
$url = $this->getURI() . "/category/$category/";
|
|
|
|
while ($url) {
|
|
|
|
//Limit total amount of requests
|
|
|
|
if (count($this->items) >= 20) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
$url = $this->collectListing($url);
|
|
|
|
}
|
|
|
|
}
|
2021-01-29 18:27:35 +00:00
|
|
|
|
2022-07-01 15:10:30 +02:00
|
|
|
// n.b. this relies on articles to be ordered by date so the cutoff works
|
|
|
|
private function collectListing($url)
|
|
|
|
{
|
|
|
|
$html = getSimpleHTMLDOMCached($url, 60 * 60);
|
|
|
|
$items = $html->find('section.b-blog .l-blog__content__listing div.c-listing-item');
|
2021-01-29 18:27:35 +00:00
|
|
|
|
2022-07-01 15:10:30 +02:00
|
|
|
$catName = trim($html->find('section.b-blog .c-blog-header__title', 0)->plaintext);
|
2021-01-29 18:27:35 +00:00
|
|
|
|
2022-07-01 15:10:30 +02:00
|
|
|
foreach ($items as $item) {
|
|
|
|
$url = $item->getAttribute('data-url');
|
|
|
|
if (!$this->collectArticle($url)) {
|
|
|
|
return null; // Too old, stop collecting
|
|
|
|
}
|
|
|
|
}
|
2021-01-29 18:27:35 +00:00
|
|
|
|
2022-07-01 15:10:30 +02:00
|
|
|
// Point's to 404 for non-english blog
|
|
|
|
// $next = $html->find('link[rel=next]', 0);
|
|
|
|
$next = $html->find('ul.page-numbers a.next', 0);
|
|
|
|
return $next ? $next->href : null;
|
|
|
|
}
|
2021-01-29 18:27:35 +00:00
|
|
|
|
2022-07-01 15:10:30 +02:00
|
|
|
// Returns a boolean whether to continue collecting articles
|
|
|
|
// i.e. date is after oldest cutoff
|
|
|
|
private function collectArticle($url)
|
|
|
|
{
|
|
|
|
if (array_key_exists($url, $this->seen)) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
$html = getSimpleHTMLDOMCached($url);
|
2021-01-29 18:27:35 +00:00
|
|
|
|
2022-07-01 15:10:30 +02:00
|
|
|
$rssItem = [ 'uri' => $url, 'uid' => $url ];
|
|
|
|
$rssItem['title'] = $html->find('meta[property=og:title]', 0)->content;
|
|
|
|
$dt = $html->find('meta[property=article:published_time]', 0)->content;
|
|
|
|
// Exit if too old
|
|
|
|
if (strtotime($dt) < $this->oldest) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
$rssItem['timestamp'] = $dt;
|
|
|
|
$img = $html->find('meta[property=og:image]', 0);
|
|
|
|
$rssItem['enclosures'] = $img ? [$img->content] : [];
|
|
|
|
$rssItem['author'] = trim($html->find('.c-blog-author__text a', 0)->plaintext);
|
|
|
|
$rssItem['categories'] = array_map(function ($link) {
|
|
|
|
return trim($link->plaintext);
|
|
|
|
}, $html->find('.b-single-header__categories .c-category-list a'));
|
|
|
|
$rssItem['content'] = trim($html->find('article', 0)->innertext);
|
2021-01-29 18:27:35 +00:00
|
|
|
|
2022-07-01 15:10:30 +02:00
|
|
|
$this->items[] = $rssItem;
|
|
|
|
$this->seen[$url] = 1;
|
|
|
|
return true;
|
|
|
|
}
|
2021-01-29 18:27:35 +00:00
|
|
|
}
|