From 382648fc22c232bc8c66111fc4d3ab6570946437 Mon Sep 17 00:00:00 2001 From: Dag Date: Fri, 13 Oct 2023 00:25:34 +0200 Subject: [PATCH] refactor: FeedExpander::parseItem() descendants (#3744) --- bridges/AcrimedBridge.php | 13 +++-- bridges/ArsTechnicaBridge.php | 4 +- bridges/BleepingComputerBridge.php | 12 ++--- bridges/CNETFranceBridge.php | 7 ++- bridges/CaschyBridge.php | 4 +- bridges/CommonDreamsBridge.php | 4 +- bridges/CourrierInternationalBridge.php | 6 +-- bridges/DarkReadingBridge.php | 5 +- bridges/DauphineLibereBridge.php | 4 +- bridges/DeveloppezDotComBridge.php | 36 +++++++------ bridges/EconomistBridge.php | 15 +++--- bridges/EngadgetBridge.php | 20 +++---- bridges/EsquerdaNetBridge.php | 38 +++++++------ bridges/FeedExpanderExampleBridge.php | 5 -- bridges/FeedExpanderTestBridge.php | 23 ++++++++ bridges/FilterBridge.php | 4 +- bridges/ForGifsBridge.php | 10 ++-- bridges/FreeCodeCampBridge.php | 14 ++--- bridges/FuturaSciencesBridge.php | 11 ++-- bridges/HardwareInfoBridge.php | 10 ++-- bridges/HeiseBridge.php | 5 +- bridges/IGNBridge.php | 11 ++-- bridges/LeMondeInformatiqueBridge.php | 5 +- bridges/ListverseBridge.php | 9 ++-- bridges/MediapartBridge.php | 14 +++-- bridges/MsnMondeBridge.php | 12 +++-- bridges/NYTBridge.php | 11 ++-- bridges/NextInpactBridge.php | 5 +- bridges/NextgovBridge.php | 3 +- bridges/NiceMatinBridge.php | 5 +- bridges/NyaaTorrentsBridge.php | 8 +-- bridges/OnVaSortirBridge.php | 16 +++--- bridges/PhoronixBridge.php | 17 +++--- bridges/QwantzBridge.php | 15 +++--- bridges/RaceDepartmentBridge.php | 7 ++- bridges/ScribbleHubBridge.php | 18 +++---- bridges/SplCenterBridge.php | 11 ++-- bridges/TapasBridge.php | 30 +++++------ bridges/TheGuardianBridge.php | 13 ++--- bridges/TwitterEngineeringBridge.php | 24 ++++----- bridges/VarietyBridge.php | 6 +-- bridges/ViceBridge.php | 10 ++-- bridges/WiredBridge.php | 9 ++-- bridges/WordPressBridge.php | 72 ++++++++++++------------- bridges/WorldOfTanksBridge.php | 4 +- bridges/ZeitBridge.php | 10 ++-- 46 files changed, 314 insertions(+), 281 deletions(-) create mode 100644 bridges/FeedExpanderTestBridge.php diff --git a/bridges/AcrimedBridge.php b/bridges/AcrimedBridge.php index d37f3ce4..93890f35 100644 --- a/bridges/AcrimedBridge.php +++ b/bridges/AcrimedBridge.php @@ -20,17 +20,16 @@ class AcrimedBridge extends FeedExpander public function collectData() { - $this->collectExpandableDatas( - static::URI . 'spip.php?page=backend', - $this->getInput('limit') - ); + $url = 'https://www.acrimed.org/spip.php?page=backend'; + $limit = $this->getInput('limit'); + $this->collectExpandableDatas($url, $limit); } - protected function parseItem($newsItem) + protected function parseItem($item) { - $item = parent::parseItem($newsItem); + $item = parent::parseItem($item); - $articlePage = getSimpleHTMLDOM($newsItem->link); + $articlePage = getSimpleHTMLDOM($item['uri']); $article = sanitize($articlePage->find('article.article1', 0)->innertext); $article = defaultLinkTo($article, static::URI); $item['content'] = $article; diff --git a/bridges/ArsTechnicaBridge.php b/bridges/ArsTechnicaBridge.php index 1e3e6379..98e5566b 100644 --- a/bridges/ArsTechnicaBridge.php +++ b/bridges/ArsTechnicaBridge.php @@ -33,9 +33,9 @@ class ArsTechnicaBridge extends FeedExpander $this->collectExpandableDatas($url); } - protected function parseItem($newItem) + protected function parseItem($item) { - $item = parent::parseItem($newItem); + $item = parent::parseItem($item); $item_html = getSimpleHTMLDOMCached($item['uri'] . '&'); $item_html = defaultLinkTo($item_html, self::URI); diff --git a/bridges/BleepingComputerBridge.php b/bridges/BleepingComputerBridge.php index c1d3d568..bad78561 100644 --- a/bridges/BleepingComputerBridge.php +++ b/bridges/BleepingComputerBridge.php @@ -7,6 +7,12 @@ class BleepingComputerBridge extends FeedExpander const URI = 'https://www.bleepingcomputer.com/'; const DESCRIPTION = 'Returns the newest articles.'; + public function collectData() + { + $feed = static::URI . 'feed/'; + $this->collectExpandableDatas($feed); + } + protected function parseItem($item) { $item = parent::parseItem($item); @@ -23,10 +29,4 @@ class BleepingComputerBridge extends FeedExpander return $item; } - - public function collectData() - { - $feed = static::URI . 'feed/'; - $this->collectExpandableDatas($feed); - } } diff --git a/bridges/CNETFranceBridge.php b/bridges/CNETFranceBridge.php index da808596..d6a766de 100644 --- a/bridges/CNETFranceBridge.php +++ b/bridges/CNETFranceBridge.php @@ -43,9 +43,9 @@ class CNETFranceBridge extends FeedExpander $this->collectExpandableDatas('https://www.cnetfrance.fr/feeds/rss/news/'); } - protected function parseItem($feedItem) + protected function parseItem($item) { - $item = parent::parseItem($feedItem); + $item = parent::parseItem($item); foreach ($this->bannedTitle as $term) { if (preg_match('/' . $term . '/mi', $item['title']) === 1) { @@ -54,8 +54,7 @@ class CNETFranceBridge extends FeedExpander } foreach ($this->bannedURL as $term) { - $preg_match = preg_match('#' . $term . '#mi', $item['uri']); - if ($preg_match === 1) { + if (preg_match('#' . $term . '#mi', $item['uri'])) { return null; } } diff --git a/bridges/CaschyBridge.php b/bridges/CaschyBridge.php index 5f463852..7d632bf6 100644 --- a/bridges/CaschyBridge.php +++ b/bridges/CaschyBridge.php @@ -34,9 +34,9 @@ class CaschyBridge extends FeedExpander ); } - protected function parseItem($feedItem) + protected function parseItem($item) { - $item = parent::parseItem($feedItem); + $item = parent::parseItem($item); if (strpos($item['uri'], 'https://stadt-bremerhaven.de/') !== 0) { return $item; diff --git a/bridges/CommonDreamsBridge.php b/bridges/CommonDreamsBridge.php index 99580499..e1a185de 100644 --- a/bridges/CommonDreamsBridge.php +++ b/bridges/CommonDreamsBridge.php @@ -12,9 +12,9 @@ class CommonDreamsBridge extends FeedExpander $this->collectExpandableDatas('http://www.commondreams.org/rss.xml', 10); } - protected function parseItem($newsItem) + protected function parseItem($item) { - $item = parent::parseItem($newsItem); + $item = parent::parseItem($item); $item['content'] = $this->extractContent($item['uri']); return $item; } diff --git a/bridges/CourrierInternationalBridge.php b/bridges/CourrierInternationalBridge.php index fdbe2ea6..9e30fd51 100644 --- a/bridges/CourrierInternationalBridge.php +++ b/bridges/CourrierInternationalBridge.php @@ -13,11 +13,11 @@ class CourrierInternationalBridge extends FeedExpander $this->collectExpandableDatas(static::URI . 'feed/all/rss.xml', 20); } - protected function parseItem($feedItem) + protected function parseItem($item) { - $item = parent::parseItem($feedItem); + $item = parent::parseItem($item); - $articlePage = getSimpleHTMLDOMCached($feedItem->link); + $articlePage = getSimpleHTMLDOMCached($item['uri']); $content = $articlePage->find('.article-text, depeche-text', 0); if (!$content) { return $item; diff --git a/bridges/DarkReadingBridge.php b/bridges/DarkReadingBridge.php index 58087506..aca30490 100644 --- a/bridges/DarkReadingBridge.php +++ b/bridges/DarkReadingBridge.php @@ -56,9 +56,10 @@ class DarkReadingBridge extends FeedExpander $this->collectExpandableDatas($feed_url, $limit); } - protected function parseItem($newsItem) + protected function parseItem($item) { - $item = parent::parseItem($newsItem); + $item = parent::parseItem($item); + $article = getSimpleHTMLDOMCached($item['uri']); $item['content'] = $this->extractArticleContent($article); $item['enclosures'] = []; //remove author profile picture diff --git a/bridges/DauphineLibereBridge.php b/bridges/DauphineLibereBridge.php index 82323036..0ab808cd 100644 --- a/bridges/DauphineLibereBridge.php +++ b/bridges/DauphineLibereBridge.php @@ -43,9 +43,9 @@ class DauphineLibereBridge extends FeedExpander $this->collectExpandableDatas($url, 10); } - protected function parseItem($newsItem) + protected function parseItem($item) { - $item = parent::parseItem($newsItem); + $item = parent::parseItem($item); $item['content'] = $this->extractContent($item['uri']); return $item; } diff --git a/bridges/DeveloppezDotComBridge.php b/bridges/DeveloppezDotComBridge.php index d0d54d0a..9dcbc31a 100644 --- a/bridges/DeveloppezDotComBridge.php +++ b/bridges/DeveloppezDotComBridge.php @@ -163,19 +163,6 @@ class DeveloppezDotComBridge extends FeedExpander ] ]; - /** - * Return the RSS url for selected domain - */ - private function getRssUrl() - { - $domain = $this->getInput('domain'); - if (!empty($domain)) { - return 'https://' . $domain . self::DOMAIN . self::RSS_URL; - } - - return self::URI . self::RSS_URL; - } - /** * Grabs the RSS item from Developpez.com */ @@ -189,15 +176,14 @@ class DeveloppezDotComBridge extends FeedExpander * Parse the content of every RSS item. And will try to get the full article * pointed by the item URL intead of the default abstract. */ - protected function parseItem($newsItem) + protected function parseItem($item) { + $item = parent::parseItem($item); + if (count($this->items) >= $this->getInput('limit')) { return null; } - // This function parse each entry in the RSS with the default parse - $item = parent::parseItem($newsItem); - // There is a bug in Developpez RSS, coma are writtent as '~?' in the // title, so I have to fix it manually $item['title'] = $this->fixComaInTitle($item['title']); @@ -229,6 +215,19 @@ class DeveloppezDotComBridge extends FeedExpander return $item; } + /** + * Return the RSS url for selected domain + */ + private function getRssUrl() + { + $domain = $this->getInput('domain'); + if (!empty($domain)) { + return 'https://' . $domain . self::DOMAIN . self::RSS_URL; + } + + return self::URI . self::RSS_URL; + } + /** * Replace '~?' by a proper coma ',' */ @@ -334,6 +333,9 @@ class DeveloppezDotComBridge extends FeedExpander */ private function isHtmlTagNotTxt($txt) { + if ($txt === '') { + return false; + } $html = str_get_html($txt); return $html && $html->root && count($html->root->children) > 0; } diff --git a/bridges/EconomistBridge.php b/bridges/EconomistBridge.php index 9a73a852..0572ab8f 100644 --- a/bridges/EconomistBridge.php +++ b/bridges/EconomistBridge.php @@ -93,21 +93,22 @@ class EconomistBridge extends FeedExpander $limit = 30; } - $this->collectExpandableDatas('https://www.economist.com/' . $category . '/rss.xml', $limit); + $url = 'https://www.economist.com/' . $category . '/rss.xml'; + $this->collectExpandableDatas($url, $limit); } - protected function parseItem($feedItem) + protected function parseItem($item) { - $item = parent::parseItem($feedItem); - $html = getSimpleHTMLDOM($item['uri']); + $item = parent::parseItem($item); + $dom = getSimpleHTMLDOM($item['uri']); - $article = $html->find('#new-article-template', 0); + $article = $dom->find('#new-article-template', 0); if ($article == null) { - $article = $html->find('main', 0); + $article = $dom->find('main', 0); } if ($article) { $elem = $article->find('div', 0); - list($content, $audio_url) = $this->processContent($html, $elem); + list($content, $audio_url) = $this->processContent($dom, $elem); $item['content'] = $content; if ($audio_url != null) { $item['enclosures'] = [$audio_url]; diff --git a/bridges/EngadgetBridge.php b/bridges/EngadgetBridge.php index c219c0ff..3253cc2e 100644 --- a/bridges/EngadgetBridge.php +++ b/bridges/EngadgetBridge.php @@ -10,26 +10,28 @@ class EngadgetBridge extends FeedExpander public function collectData() { + $url = 'https://www.engadget.com/rss.xml'; $max = 10; - $this->collectExpandableDatas(static::URI . 'rss.xml', $max); + $this->collectExpandableDatas($url, $max); } - protected function parseItem($newsItem) + protected function parseItem($item) { - $item = parent::parseItem($newsItem); - $url = (string) $newsItem->link; - if (!$url) { + $item = parent::parseItem($item); + + $itemUrl = trim($item['uri']); + if (!$itemUrl) { return $item; } // todo: remove querystring tracking - $articlePage = getSimpleHTMLDOM($url); + $dom = getSimpleHTMLDOM($itemUrl); // figure contain's the main article image - $article = $articlePage->find('figure', 0); + $article = $dom->find('figure', 0); // .article-text has the actual article - foreach ($articlePage->find('.article-text') as $element) { + foreach ($dom->find('.article-text') as $element) { $article = $article . $element; } - $item['content'] = $article; + $item['content'] = $article ?? ''; return $item; } } diff --git a/bridges/EsquerdaNetBridge.php b/bridges/EsquerdaNetBridge.php index ffb4fd4e..64a6949f 100644 --- a/bridges/EsquerdaNetBridge.php +++ b/bridges/EsquerdaNetBridge.php @@ -1,5 +1,8 @@ getInput('feed'); - return self::URI . '/rss/' . $type; - } - - public function getIcon() - { - return 'https://www.esquerda.net/sites/default/files/favicon_0.ico'; - } - public function collectData() { parent::collectExpandableDatas($this->getURI()); } - protected function parseItem($newsItem) + protected function parseItem($item) { - # Fix Publish date - $badDate = $newsItem->pubDate; - preg_match('|(?P\d\d)/(?P\d\d)/(?P\d\d\d\d) - (?P\d\d):(?P\d\d)|', $badDate, $d); - $newsItem->pubDate = sprintf('%s-%s-%sT%s:%s', $d['year'], $d['month'], $d['day'], $d['hour'], $d['minute']); - $item = parent::parseItem($newsItem); - # Include all the content - $uri = $item['uri']; - $html = getSimpleHTMLDOMCached($uri); + $item = parent::parseItem($item); + + $html = getSimpleHTMLDOMCached($item['uri']); $content = $html->find('div#content div.content', 0); ## Fix author $authorHTML = $html->find('.field-name-field-op-author a', 0); @@ -72,4 +59,15 @@ class EsquerdaNetBridge extends FeedExpander $item['content'] = $content; return $item; } + + public function getURI() + { + $type = $this->getInput('feed'); + return self::URI . '/rss/' . $type; + } + + public function getIcon() + { + return 'https://www.esquerda.net/sites/default/files/favicon_0.ico'; + } } diff --git a/bridges/FeedExpanderExampleBridge.php b/bridges/FeedExpanderExampleBridge.php index 0e9ae386..f0af64f4 100644 --- a/bridges/FeedExpanderExampleBridge.php +++ b/bridges/FeedExpanderExampleBridge.php @@ -43,9 +43,4 @@ class FeedExpanderExampleBridge extends FeedExpander returnClientError('Unknown version ' . $this->getInput('version') . '!'); } } - - protected function parseItem($newsItem) - { - return (array) $newsItem; - } } diff --git a/bridges/FeedExpanderTestBridge.php b/bridges/FeedExpanderTestBridge.php new file mode 100644 index 00000000..9a6e7bb7 --- /dev/null +++ b/bridges/FeedExpanderTestBridge.php @@ -0,0 +1,23 @@ +collectExpandableDatas($url); + } +} diff --git a/bridges/FilterBridge.php b/bridges/FilterBridge.php index 1d920f90..3e3e812d 100644 --- a/bridges/FilterBridge.php +++ b/bridges/FilterBridge.php @@ -82,9 +82,9 @@ class FilterBridge extends FeedExpander $this->collectExpandableDatas($this->getURI()); } - protected function parseItem($newItem) + protected function parseItem($item) { - $item = parent::parseItem($newItem); + $item = parent::parseItem($item); // Generate title from first 50 characters of content? if ($this->getInput('title_from_content') && array_key_exists('content', $item)) { diff --git a/bridges/ForGifsBridge.php b/bridges/ForGifsBridge.php index 03848d04..e210124a 100644 --- a/bridges/ForGifsBridge.php +++ b/bridges/ForGifsBridge.php @@ -12,12 +12,12 @@ class ForGifsBridge extends FeedExpander $this->collectExpandableDatas('https://forgifs.com/gallery/srss/7'); } - protected function parseItem($feedItem) + protected function parseItem($item) { - $item = parent::parseItem($feedItem); + $item = parent::parseItem($item); - $content = str_get_html($item['content']); - $img = $content->find('img', 0); + $dom = str_get_html($item['content']); + $img = $dom->find('img', 0); $poster = $img->src; // The actual gif is the same path but its id must be decremented by one. @@ -34,7 +34,7 @@ class ForGifsBridge extends FeedExpander $img->width = 'auto'; $img->height = 'auto'; - $item['content'] = $content; + $item['content'] = (string) $dom; return $item; } diff --git a/bridges/FreeCodeCampBridge.php b/bridges/FreeCodeCampBridge.php index 89d8c53a..141746d2 100644 --- a/bridges/FreeCodeCampBridge.php +++ b/bridges/FreeCodeCampBridge.php @@ -14,15 +14,17 @@ class FreeCodeCampBridge extends FeedExpander $this->collectExpandableDatas('https://www.freecodecamp.org/news/rss/', 15); } - protected function parseItem($newsItem) + protected function parseItem($item) { - $item = parent::parseItem($newsItem); - // $articlePage gets the entire page's contents - $articlePage = getSimpleHTMLDOM($newsItem->link); + $item = parent::parseItem($item); + + $dom = getSimpleHTMLDOM($item['uri']); + // figure contain's the main article image - $article = $articlePage->find('figure', 0); + $article = $dom->find('figure', 0); + // the actual article - foreach ($articlePage->find('.post-full-content') as $element) { + foreach ($dom->find('.post-full-content') as $element) { $article = $article . $element; } $item['content'] = $article; diff --git a/bridges/FuturaSciencesBridge.php b/bridges/FuturaSciencesBridge.php index 3fb8aafa..cfb2d711 100644 --- a/bridges/FuturaSciencesBridge.php +++ b/bridges/FuturaSciencesBridge.php @@ -85,13 +85,14 @@ class FuturaSciencesBridge extends FeedExpander $this->collectExpandableDatas($url, 10); } - protected function parseItem($newsItem) + protected function parseItem($item) { - $item = parent::parseItem($newsItem); + $item = parent::parseItem($item); + $item['uri'] = str_replace('#xtor%3DRSS-8', '', $item['uri']); - $article = getSimpleHTMLDOMCached($item['uri']); - $item['content'] = $this->extractArticleContent($article); - $author = $this->extractAuthor($article); + $dom = getSimpleHTMLDOMCached($item['uri']); + $item['content'] = $this->extractArticleContent($dom); + $author = $this->extractAuthor($dom); if (!empty($author)) { $item['author'] = $author; } diff --git a/bridges/HardwareInfoBridge.php b/bridges/HardwareInfoBridge.php index e295984c..6a47df66 100644 --- a/bridges/HardwareInfoBridge.php +++ b/bridges/HardwareInfoBridge.php @@ -9,15 +9,15 @@ class HardwareInfoBridge extends FeedExpander public function collectData() { - $this->collectExpandableDatas('https://nl.hardware.info/updates/all.rss', 20); + $this->collectExpandableDatas('https://nl.hardware.info/updates/all.rss', 10); } - protected function parseItem($feedItem) + protected function parseItem($item) { - $item = parent::parseItem($feedItem); + $item = parent::parseItem($item); - //get full article - $articlePage = getSimpleHTMLDOMCached($feedItem->link); + $itemUrl = $item['uri']; + $articlePage = getSimpleHTMLDOMCached($itemUrl); $article = $articlePage->find('div.article__content', 0); diff --git a/bridges/HeiseBridge.php b/bridges/HeiseBridge.php index dfda311c..434e7514 100644 --- a/bridges/HeiseBridge.php +++ b/bridges/HeiseBridge.php @@ -125,9 +125,10 @@ class HeiseBridge extends FeedExpander ); } - protected function parseItem($feedItem) + protected function parseItem($item) { - $item = parent::parseItem($feedItem); + $item = parent::parseItem($item); + $sessioncookie = $this->getInput('sessioncookie'); // strip rss parameter diff --git a/bridges/IGNBridge.php b/bridges/IGNBridge.php index d00b6a18..c0260cbd 100644 --- a/bridges/IGNBridge.php +++ b/bridges/IGNBridge.php @@ -10,17 +10,16 @@ class IGNBridge extends FeedExpander public function collectData() { - $this->collectExpandableDatas('http://feeds.ign.com/ign/all', 15); + $this->collectExpandableDatas('http://feeds.ign.com/ign/all', 2); } // IGNs feed is both hidden and incomplete. This bridge tries to fix this. - protected function parseItem($newsItem) + protected function parseItem($item) { - $item = parent::parseItem($newsItem); + $item = parent::parseItem($item); - // $articlePage gets the entire page's contents - $articlePage = getSimpleHTMLDOM($newsItem->link); + $articlePage = getSimpleHTMLDOM($item['uri']); // List of BS elements $uselessElements = [ @@ -33,7 +32,7 @@ class IGNBridge extends FeedExpander '.jsx-4213937408', '.commerce-container', '.widget-container', - '.newsletter-signup-button' + '.newsletter-signup-button', ]; // Remove useless elements diff --git a/bridges/LeMondeInformatiqueBridge.php b/bridges/LeMondeInformatiqueBridge.php index 678e405f..c91a0437 100644 --- a/bridges/LeMondeInformatiqueBridge.php +++ b/bridges/LeMondeInformatiqueBridge.php @@ -12,9 +12,10 @@ class LeMondeInformatiqueBridge extends FeedExpander $this->collectExpandableDatas(self::URI . 'rss/rss.xml', 10); } - protected function parseItem($newsItem) + protected function parseItem($item) { - $item = parent::parseItem($newsItem); + $item = parent::parseItem($item); + $article_html = getSimpleHTMLDOMCached($item['uri']); //Deduce thumbnail URL from article image URL diff --git a/bridges/ListverseBridge.php b/bridges/ListverseBridge.php index ba6d7397..b7acbdd0 100644 --- a/bridges/ListverseBridge.php +++ b/bridges/ListverseBridge.php @@ -13,12 +13,11 @@ class ListverseBridge extends FeedExpander $this->collectExpandableDatas('https://listverse.com/feed/', 15); } - protected function parseItem($newsItem) + protected function parseItem($item) { - $item = parent::parseItem($newsItem); - // $articlePage gets the entire page's contents - $articlePage = getSimpleHTMLDOM($newsItem->link); - $article = $articlePage->find('#articlecontentonly', 0); + $item = parent::parseItem($item); + $dom = getSimpleHTMLDOM($item['uri']); + $article = $dom->find('#articlecontentonly', 0); $item['content'] = $article; return $item; } diff --git a/bridges/MediapartBridge.php b/bridges/MediapartBridge.php index 3c8c8317..c4deda61 100644 --- a/bridges/MediapartBridge.php +++ b/bridges/MediapartBridge.php @@ -29,9 +29,11 @@ class MediapartBridge extends FeedExpander $this->collectExpandableDatas($url); } - protected function parseItem($newsItem) + protected function parseItem($item) { - $item = parent::parseItem($newsItem); + $item = parent::parseItem($item); + + $itemUrl = $item['uri']; // Mediapart provide multiple type of contents. // We only process items relative to the newspaper @@ -49,12 +51,8 @@ class MediapartBridge extends FeedExpander $opt = []; $opt[CURLOPT_COOKIE] = 'MPSESSID=' . $mpsessid; - // Get the page - $articlePage = getSimpleHTMLDOM( - $newsItem->link . '?onglet=full', - [], - $opt - ); + $pageUrl = $itemUrl . '?onglet=full'; + $articlePage = getSimpleHTMLDOM($pageUrl, [], $opt); // Extract the article content $content = $articlePage->find('div.content-article', 0)->innertext; diff --git a/bridges/MsnMondeBridge.php b/bridges/MsnMondeBridge.php index 844aa4a2..9b308b99 100644 --- a/bridges/MsnMondeBridge.php +++ b/bridges/MsnMondeBridge.php @@ -22,17 +22,19 @@ class MsnMondeBridge extends FeedExpander public function collectData() { - $this->collectExpandableDatas(self::FEED_URL, self::LIMIT); + $this->collectExpandableDatas(self::FEED_URL, 10); } - protected function parseItem($newsItem) + protected function parseItem($item) { - $item = parent::parseItem($newsItem); + $item = parent::parseItem($item); + if (!preg_match('#fr-fr/actualite.*/ar-(?[\w]*)\?#', $item['uri'], $matches)) { - return; + return null; } - $json = json_decode(getContents(self::JSON_URL . $matches['id']), true); + $jsonString = getContents(self::JSON_URL . $matches['id']); + $json = json_decode($jsonString, true); $item['content'] = $json['body']; if (!empty($json['authors'])) { $item['author'] = reset($json['authors'])['name']; diff --git a/bridges/NYTBridge.php b/bridges/NYTBridge.php index 46ede3f8..57c3e2af 100644 --- a/bridges/NYTBridge.php +++ b/bridges/NYTBridge.php @@ -10,17 +10,18 @@ class NYTBridge extends FeedExpander public function collectData() { - $this->collectExpandableDatas('https://rss.nytimes.com/services/xml/rss/nyt/HomePage.xml', 40); + $url = 'https://rss.nytimes.com/services/xml/rss/nyt/HomePage.xml'; + $this->collectExpandableDatas($url, 40); } - protected function parseItem($newsItem) + protected function parseItem($item) { - $item = parent::parseItem($newsItem); + $item = parent::parseItem($item); + $article = ''; - // $articlePage gets the entire page's contents try { - $articlePage = getSimpleHTMLDOM($newsItem->link); + $articlePage = getSimpleHTMLDOM($item['uri']); } catch (HttpException $e) { // 403 Forbidden, This means we got anti-bot response if ($e->getCode() === 403) { diff --git a/bridges/NextInpactBridge.php b/bridges/NextInpactBridge.php index 408fd783..0260da14 100644 --- a/bridges/NextInpactBridge.php +++ b/bridges/NextInpactBridge.php @@ -88,9 +88,10 @@ class NextInpactBridge extends FeedExpander $this->collectExpandableDatas($url, $limit); } - protected function parseItem($newsItem) + protected function parseItem($item) { - $item = parent::parseItem($newsItem); + $item = parent::parseItem($item); + $item['content'] = $this->extractContent($item, $item['uri']); if (is_null($item['content'])) { return null; //Filtered article diff --git a/bridges/NextgovBridge.php b/bridges/NextgovBridge.php index ad17f88e..1e096d6c 100644 --- a/bridges/NextgovBridge.php +++ b/bridges/NextgovBridge.php @@ -26,7 +26,8 @@ class NextgovBridge extends FeedExpander public function collectData() { - $this->collectExpandableDatas(self::URI . 'rss/' . $this->getInput('category') . '/', 10); + $url = self::URI . 'rss/' . $this->getInput('category') . '/'; + $this->collectExpandableDatas($url, 10); } protected function parseItem($newsItem) diff --git a/bridges/NiceMatinBridge.php b/bridges/NiceMatinBridge.php index 6e622b42..bcebbbbb 100644 --- a/bridges/NiceMatinBridge.php +++ b/bridges/NiceMatinBridge.php @@ -12,9 +12,10 @@ class NiceMatinBridge extends FeedExpander $this->collectExpandableDatas(self::URI . 'derniere-minute/rss', 10); } - protected function parseItem($newsItem) + protected function parseItem($item) { - $item = parent::parseItem($newsItem); + $item = parent::parseItem($item); + $item['content'] = $this->extractContent($item['uri']); return $item; } diff --git a/bridges/NyaaTorrentsBridge.php b/bridges/NyaaTorrentsBridge.php index 217db377..3b5ad3ad 100644 --- a/bridges/NyaaTorrentsBridge.php +++ b/bridges/NyaaTorrentsBridge.php @@ -65,12 +65,14 @@ class NyaaTorrentsBridge extends FeedExpander $this->collectExpandableDatas($this->getURI(), 20); } - protected function parseItem($newItem) + protected function parseItem($newsItem) { - $item = parent::parseItem($newItem); + $item = parent::parseItem($newsItem); + + $nyaaFields = (array)($newsItem->children('nyaa', true)); + $item['id'] = str_replace(['https://nyaa.si/download/', '.torrent'], '', $item['uri']); - $nyaaFields = (array)($newItem->children('nyaa', true)); $item = array_merge($item, $nyaaFields); // Convert URI from torrent file to web page diff --git a/bridges/OnVaSortirBridge.php b/bridges/OnVaSortirBridge.php index af80dd31..9f9a750c 100644 --- a/bridges/OnVaSortirBridge.php +++ b/bridges/OnVaSortirBridge.php @@ -117,18 +117,18 @@ class OnVaSortirBridge extends FeedExpander ] ]; + public function collectData() + { + $url = 'https://' . $this->getInput('city') . '.onvasortir.com/rss.php'; + $this->collectExpandableDatas($url); + } + protected function parseItem($item) { $item = parent::parseItem($item); - $html = getSimpleHTMLDOMCached($item['uri']); - $text = $html->find('div.corpsMax', 0)->innertext; + $dom = getSimpleHTMLDOMCached($item['uri']); + $text = $dom->find('div.corpsMax', 0)->innertext; $item['content'] = utf8_encode($text); return $item; } - - public function collectData() - { - $this->collectExpandableDatas('https://' . - $this->getInput('city') . '.onvasortir.com/rss.php'); - } } diff --git a/bridges/PhoronixBridge.php b/bridges/PhoronixBridge.php index 620fda66..fc0d78e5 100644 --- a/bridges/PhoronixBridge.php +++ b/bridges/PhoronixBridge.php @@ -29,22 +29,25 @@ but some RSS readers don\'t support this. "img" tag are supported by most browse $this->collectExpandableDatas('https://www.phoronix.com/rss.php', $this->getInput('n')); } - protected function parseItem($newsItem) + protected function parseItem($item) { - $item = parent::parseItem($newsItem); - // $articlePage gets the entire page's contents - $articlePage = getSimpleHTMLDOM($newsItem->link); + $item = parent::parseItem($item); + + $itemUrl = $item['uri']; + + $articlePage = getSimpleHTMLDOM($itemUrl); $articlePage = defaultLinkTo($articlePage, $this->getURI()); // Extract final link. From Facebook's like plugin. - parse_str(parse_url($articlePage->find('iframe[src^=//www.facebook.com/plugins]', 0), PHP_URL_QUERY), $facebookQuery); + $parsedUrlQuery = parse_url($articlePage->find('iframe[src^=//www.facebook.com/plugins]', 0), PHP_URL_QUERY); + parse_str($parsedUrlQuery, $facebookQuery); if (array_key_exists('href', $facebookQuery)) { - $newsItem->link = $facebookQuery['href']; + $itemUrl = $facebookQuery['href']; } $item['content'] = $this->extractContent($articlePage); $pages = $articlePage->find('.pagination a[!title]'); foreach ($pages as $page) { - $pageURI = urljoin($newsItem->link, html_entity_decode($page->href)); + $pageURI = urljoin($itemUrl, html_entity_decode($page->href)); $page = getSimpleHTMLDOM($pageURI); $item['content'] .= $this->extractContent($page); } diff --git a/bridges/QwantzBridge.php b/bridges/QwantzBridge.php index e48e948a..2117c33c 100644 --- a/bridges/QwantzBridge.php +++ b/bridges/QwantzBridge.php @@ -6,9 +6,15 @@ class QwantzBridge extends FeedExpander const URI = 'https://qwantz.com/'; const DESCRIPTION = 'Latest comic.'; - protected function parseItem($feedItem) + public function collectData() { - $item = parent::parseItem($feedItem); + $this->collectExpandableDatas(self::URI . 'rssfeed.php'); + } + + protected function parseItem($item) + { + $item = parent::parseItem($item); + $item['author'] = 'Ryan North'; preg_match('/title="(.*?)"/', $item['content'], $matches); @@ -25,11 +31,6 @@ class QwantzBridge extends FeedExpander return $item; } - public function collectData() - { - $this->collectExpandableDatas(self::URI . 'rssfeed.php'); - } - public function getIcon() { return self::URI . 'favicon.ico'; diff --git a/bridges/RaceDepartmentBridge.php b/bridges/RaceDepartmentBridge.php index 7fe92b4a..3783b53e 100644 --- a/bridges/RaceDepartmentBridge.php +++ b/bridges/RaceDepartmentBridge.php @@ -12,12 +12,11 @@ class RaceDepartmentBridge extends FeedExpander $this->collectExpandableDatas('https://www.racedepartment.com/ams/index.rss', 10); } - protected function parseItem($feedItem) + protected function parseItem($item) { - $item = parent::parseItem($feedItem); + $item = parent::parseItem($item); - //fetch page - $articlePage = getSimpleHTMLDOMCached($feedItem->link); + $articlePage = getSimpleHTMLDOMCached($item['uri']); $coverImage = $articlePage->find('img.js-articleCoverImage', 0); #relative url -> absolute url diff --git a/bridges/ScribbleHubBridge.php b/bridges/ScribbleHubBridge.php index 4fe43df3..8f52d461 100644 --- a/bridges/ScribbleHubBridge.php +++ b/bridges/ScribbleHubBridge.php @@ -42,9 +42,9 @@ class ScribbleHubBridge extends FeedExpander $this->collectExpandableDatas($url); } - protected function parseItem($newItem) + protected function parseItem($item) { - $item = parent::parseItem($newItem); + $item = parent::parseItem($item); //For series, filter out other series from 'All' feed if ( @@ -57,7 +57,7 @@ class ScribbleHubBridge extends FeedExpander $item['comments'] = $item['uri'] . '#comments'; try { - $item_html = getSimpleHTMLDOMCached($item['uri']); + $dom = getSimpleHTMLDOMCached($item['uri']); } catch (HttpException $e) { // 403 Forbidden, This means we got anti-bot response if ($e->getCode() === 403) { @@ -66,22 +66,22 @@ class ScribbleHubBridge extends FeedExpander throw $e; } - $item_html = defaultLinkTo($item_html, self::URI); + $dom = defaultLinkTo($dom, self::URI); //Retrieve full description from page contents - $item['content'] = $item_html->find('#chp_raw', 0); + $item['content'] = $dom->find('#chp_raw', 0); //Retrieve image for thumbnail - $item_image = $item_html->find('.s_novel_img > img', 0)->src; + $item_image = $dom->find('.s_novel_img > img', 0)->src; $item['enclosures'] = [$item_image]; //Restore lost categories - $item_story = html_entity_decode($item_html->find('.chp_byauthor > a', 0)->innertext); - $item_sid = $item_html->find('#mysid', 0)->value; + $item_story = html_entity_decode($dom->find('.chp_byauthor > a', 0)->innertext); + $item_sid = $dom->find('#mysid', 0)->value; $item['categories'] = [$item_story, $item_sid]; //Generate UID - $item_pid = $item_html->find('#mypostid', 0)->value; + $item_pid = $dom->find('#mypostid', 0)->value; $item['uid'] = $item_sid . "/$item_pid"; return $item; diff --git a/bridges/SplCenterBridge.php b/bridges/SplCenterBridge.php index a590558d..ca764846 100644 --- a/bridges/SplCenterBridge.php +++ b/bridges/SplCenterBridge.php @@ -21,6 +21,12 @@ class SplCenterBridge extends FeedExpander const CACHE_TIMEOUT = 3600; // 1 hour + public function collectData() + { + $url = $this->getURI() . '/rss.xml'; + $this->collectExpandableDatas($url); + } + protected function parseItem($item) { $item = parent::parseItem($item); @@ -37,11 +43,6 @@ class SplCenterBridge extends FeedExpander return $item; } - public function collectData() - { - $this->collectExpandableDatas($this->getURI() . '/rss.xml'); - } - public function getURI() { if (!is_null($this->getInput('content'))) { diff --git a/bridges/TapasBridge.php b/bridges/TapasBridge.php index e512ad48..11a9551d 100644 --- a/bridges/TapasBridge.php +++ b/bridges/TapasBridge.php @@ -30,14 +30,17 @@ class TapasBridge extends FeedExpander protected $id; - public function getURI() + public function collectData() { - if ($this->id) { - return self::URI . 'rss/series/' . $this->id; - } else { - return self::URI . 'series/' . $this->getInput('title') . '/info/'; + if (preg_match('/^[\d]+$/', $this->getInput('title'))) { + $this->id = $this->getInput('title'); } - return self::URI; + if ($this->getInput('force_title') or !$this->id) { + $html = getSimpleHTMLDOM($this->getURI()) or returnServerError('Could not request ' . $this->getURI()); + $this->id = $html->find('meta[property$=":url"]', 0)->content; + $this->id = str_ireplace(['tapastic://series/', '/info'], '', $this->id); + } + $this->collectExpandableDatas($this->getURI()); } protected function parseItem($feedItem) @@ -72,16 +75,13 @@ class TapasBridge extends FeedExpander return $item; } - public function collectData() + public function getURI() { - if (preg_match('/^[\d]+$/', $this->getInput('title'))) { - $this->id = $this->getInput('title'); + if ($this->id) { + return self::URI . 'rss/series/' . $this->id; + } else { + return self::URI . 'series/' . $this->getInput('title') . '/info/'; } - if ($this->getInput('force_title') or !$this->id) { - $html = getSimpleHTMLDOM($this->getURI()) or returnServerError('Could not request ' . $this->getURI()); - $this->id = $html->find('meta[property$=":url"]', 0)->content; - $this->id = str_ireplace(['tapastic://series/', '/info'], '', $this->id); - } - $this->collectExpandableDatas($this->getURI()); + return self::URI; } } diff --git a/bridges/TheGuardianBridge.php b/bridges/TheGuardianBridge.php index e05bde75..98e56506 100644 --- a/bridges/TheGuardianBridge.php +++ b/bridges/TheGuardianBridge.php @@ -52,18 +52,15 @@ class TheGuardianBridge extends FeedExpander public function collectData() { $feed = $this->getInput('feed'); - $feedURL = 'https://feeds.theguardian.com/theguardian/' . $feed; - $this->collectExpandableDatas($feedURL, 10); + $url = 'https://feeds.theguardian.com/theguardian/' . $feed; + $this->collectExpandableDatas($url, 10); } - protected function parseItem($newsItem) + protected function parseItem($item) { - $item = parent::parseItem($newsItem); + $item = parent::parseItem($item); - // --- Recovering the article --- - - // $articlePage gets the entire page's contents - $articlePage = getSimpleHTMLDOM($newsItem->link); + $articlePage = getSimpleHTMLDOM($item['uri']); // figure contain's the main article image $article = $articlePage->find('figure', 0); // content__article-body has the actual article diff --git a/bridges/TwitterEngineeringBridge.php b/bridges/TwitterEngineeringBridge.php index fa55b4a4..b98cfb87 100644 --- a/bridges/TwitterEngineeringBridge.php +++ b/bridges/TwitterEngineeringBridge.php @@ -8,18 +8,24 @@ class TwitterEngineeringBridge extends FeedExpander const DESCRIPTION = 'Returns the newest articles.'; const CACHE_TIMEOUT = 21600; // 6h + public function collectData() + { + $url = 'https://blog.twitter.com/engineering/en_us/blog.rss'; + $this->collectExpandableDatas($url); + } + protected function parseItem($item) { $item = parent::parseItem($item); - $article_html = getSimpleHTMLDOMCached($item['uri']); - if (!$article_html) { + $dom = getSimpleHTMLDOMCached($item['uri']); + if (!$dom) { $item['content'] .= '

Could not request ' . $this->getName() . ': ' . $item['uri'] . '

'; return $item; } - $article_html = defaultLinkTo($article_html, $this->getURI()); + $dom = defaultLinkTo($dom, $this->getURI()); - $article_body = $article_html->find('div.column.column-6', 0); + $article_body = $dom->find('div.column.column-6', 0); // Remove elements that are not part of article content $unwanted_selector = 'div.bl02-blog-post-text-masthead, div.tweet-error-text, div.bl13-tweet-template'; @@ -33,8 +39,8 @@ class TwitterEngineeringBridge extends FeedExpander } $item['content'] = $article_body; - $item['timestamp'] = strtotime($article_html->find('span.b02-blog-post-no-masthead__date', 0)->innertext); - $item['categories'] = self::getCategoriesFromTags($article_html); + $item['timestamp'] = strtotime($dom->find('span.b02-blog-post-no-masthead__date', 0)->innertext); + $item['categories'] = self::getCategoriesFromTags($dom); return $item; } @@ -53,12 +59,6 @@ class TwitterEngineeringBridge extends FeedExpander return $categories; } - public function collectData() - { - $feed = static::URI . 'en_us/blog.rss'; - $this->collectExpandableDatas($feed); - } - public function getName() { // Else the original feed returns "English (US)" as the title diff --git a/bridges/VarietyBridge.php b/bridges/VarietyBridge.php index 23d1df3f..6625dca2 100644 --- a/bridges/VarietyBridge.php +++ b/bridges/VarietyBridge.php @@ -13,11 +13,11 @@ class VarietyBridge extends FeedExpander $this->collectExpandableDatas('https://feeds.feedburner.com/variety/headlines', 15); } - protected function parseItem($newsItem) + protected function parseItem($item) { - $item = parent::parseItem($newsItem); + $item = parent::parseItem($item); // $articlePage gets the entire page's contents - $articlePage = getSimpleHTMLDOM($newsItem->link); + $articlePage = getSimpleHTMLDOM($item['uri']); // Remove Script tags foreach ($articlePage->find('script') as $script_tag) { diff --git a/bridges/ViceBridge.php b/bridges/ViceBridge.php index 35414020..c7ecec33 100644 --- a/bridges/ViceBridge.php +++ b/bridges/ViceBridge.php @@ -32,14 +32,14 @@ class ViceBridge extends FeedExpander $this->collectExpandableDatas($feedURL, 10); } - protected function parseItem($newsItem) + protected function parseItem($item) { - $item = parent::parseItem($newsItem); - // $articlePage gets the entire page's contents - $articlePage = getSimpleHTMLDOM($newsItem->link); + $item = parent::parseItem($item); + + $articlePage = getSimpleHTMLDOM($item['uri']); // text and embedded content $article = $articlePage->find('.article__body', 0); - $item['content'] = $article; + $item['content'] = $article ?? ''; return $item; } diff --git a/bridges/WiredBridge.php b/bridges/WiredBridge.php index d4c7cbbb..7f7f6051 100644 --- a/bridges/WiredBridge.php +++ b/bridges/WiredBridge.php @@ -50,13 +50,16 @@ class WiredBridge extends FeedExpander $this->collectExpandableDatas($feed_url, $limit); } - protected function parseItem($newsItem) + protected function parseItem($item) { - $item = parent::parseItem($newsItem); + $item = parent::parseItem($item); + + $originalContent = $item['content']; + $article = getSimpleHTMLDOMCached($item['uri']); $item['content'] = $this->extractArticleContent($article); - $headline = strval($newsItem->description); + $headline = $originalContent; if (!empty($headline)) { $item['content'] = '

' . $headline . '

' . $item['content']; } diff --git a/bridges/WordPressBridge.php b/bridges/WordPressBridge.php index 1d46958d..fb27eb31 100644 --- a/bridges/WordPressBridge.php +++ b/bridges/WordPressBridge.php @@ -20,50 +20,56 @@ class WordPressBridge extends FeedExpander ], ]]; - private function cleanContent($content) + public function collectData() { - $content = stripWithDelimiters($content, ''); - $content = preg_replace('/
/', '', $content); - return $content; + $limit = $this->getInput('limit') ?? 10; + if ($this->getInput('url') && substr($this->getInput('url'), 0, strlen('http')) !== 'http') { + // just in case someone find a way to access local files by playing with the url + returnClientError('The url parameter must either refer to http or https protocol.'); + } + try { + $this->collectExpandableDatas($this->getURI() . '/feed/atom/', $limit); + } catch (Exception $e) { + $this->collectExpandableDatas($this->getURI() . '/?feed=atom', $limit); + } } - protected function parseItem($newItem) + protected function parseItem($item) { - $item = parent::parseItem($newItem); + $item = parent::parseItem($item); - $article_html = getSimpleHTMLDOMCached($item['uri']); + $dom = getSimpleHTMLDOMCached($item['uri']); // Find article body $article = null; switch (true) { case !empty($this->getInput('content-selector')): // custom contect selector (manually specified by user) - $article = $article_html->find($this->getInput('content-selector'), 0); + $article = $dom->find($this->getInput('content-selector'), 0); break; - case !is_null($article_html->find('[itemprop=articleBody]', 0)): + case !is_null($dom->find('[itemprop=articleBody]', 0)): // highest priority content div (used for SEO) - $article = $article_html->find('[itemprop=articleBody]', 0); + $article = $dom->find('[itemprop=articleBody]', 0); break; - case !is_null($article_html->find('.article-content', 0)): + case !is_null($dom->find('.article-content', 0)): // more precise than article when present - $article = $article_html->find('.article-content', 0); + $article = $dom->find('.article-content', 0); break; - case !is_null($article_html->find('article', 0)): + case !is_null($dom->find('article', 0)): // most common content div - $article = $article_html->find('article', 0); + $article = $dom->find('article', 0); break; - case !is_null($article_html->find('.single-content', 0)): + case !is_null($dom->find('.single-content', 0)): // another common content div - $article = $article_html->find('.single-content', 0); + $article = $dom->find('.single-content', 0); break; - case !is_null($article_html->find('.post-content', 0)): + case !is_null($dom->find('.post-content', 0)): // another common content div - $article = $article_html->find('.post-content', 0); + $article = $dom->find('.post-content', 0); break; - case !is_null($article_html->find('.post', 0)): + case !is_null($dom->find('.post', 0)): // for old WordPress themes without HTML5 - $article = $article_html->find('.post', 0); + $article = $dom->find('.post', 0); break; } @@ -76,7 +82,7 @@ class WordPressBridge extends FeedExpander // Find article main image $article = convertLazyLoading($article); - $article_image = $article_html->find('img.wp-post-image', 0); + $article_image = $dom->find('img.wp-post-image', 0); if (!empty($item['content']) && (!is_object($article_image) || empty($article_image->src))) { $article_image = str_get_html($item['content'])->find('img.wp-post-image', 0); } @@ -106,6 +112,14 @@ class WordPressBridge extends FeedExpander return $item; } + private function cleanContent($content) + { + $content = stripWithDelimiters($content, ''); + $content = preg_replace('/
/', '', $content); + return $content; + } + public function getURI() { $url = $this->getInput('url'); @@ -114,18 +128,4 @@ class WordPressBridge extends FeedExpander } return $url; } - - public function collectData() - { - $limit = $this->getInput('limit') ?? 10; - if ($this->getInput('url') && substr($this->getInput('url'), 0, strlen('http')) !== 'http') { - // just in case someone find a way to access local files by playing with the url - returnClientError('The url parameter must either refer to http or https protocol.'); - } - try { - $this->collectExpandableDatas($this->getURI() . '/feed/atom/', $limit); - } catch (Exception $e) { - $this->collectExpandableDatas($this->getURI() . '/?feed=atom', $limit); - } - } } diff --git a/bridges/WorldOfTanksBridge.php b/bridges/WorldOfTanksBridge.php index 6e7a594b..52691025 100644 --- a/bridges/WorldOfTanksBridge.php +++ b/bridges/WorldOfTanksBridge.php @@ -30,9 +30,9 @@ class WorldOfTanksBridge extends FeedExpander $this->collectExpandableDatas(sprintf('https://worldoftanks.eu/%s/rss/news/', $this->getInput('lang'))); } - protected function parseItem($newsItem) + protected function parseItem($item) { - $item = parent::parseItem($newsItem); + $item = parent::parseItem($item); $item['content'] = $this->loadFullArticle($item['uri']); return $item; } diff --git a/bridges/ZeitBridge.php b/bridges/ZeitBridge.php index b294e9fb..7d7e89aa 100644 --- a/bridges/ZeitBridge.php +++ b/bridges/ZeitBridge.php @@ -50,19 +50,19 @@ class ZeitBridge extends FeedExpander 'defaultValue' => 5 ] ]]; - const LIMIT = 5; public function collectData() { - $this->collectExpandableDatas( - $this->getInput('category'), - $this->getInput('limit') ?: static::LIMIT - ); + $url = $this->getInput('category'); + $limit = $this->getInput('limit') ?: 5; + + $this->collectExpandableDatas($url, $limit); } protected function parseItem($item) { $item = parent::parseItem($item); + $item['enclosures'] = []; $headers = [