diff --git a/bridges/GatesNotesBridge.php b/bridges/GatesNotesBridge.php index 8c988fcb..c7b7c37f 100644 --- a/bridges/GatesNotesBridge.php +++ b/bridges/GatesNotesBridge.php @@ -1,6 +1,6 @@ 'all', + 'setNumber' => '0', + 'sortByVideo' => 'all', + 'sortByTopic' => 'all' + ]; + $api_endpoint = '/api/TGNWebAPI/Get_Filtered_Article_Set?'; + $apiUrl = self::URI . $api_endpoint . http_build_query($params); - $article_html = getSimpleHTMLDOMCached($item['uri']); + $rawContent = getContents($apiUrl); + $cleanedContent = str_replace('\r\n', '', substr($rawContent, 1, -1)); + $cleanedContent = str_replace('\"', '"', $cleanedContent); + + // The content is actually a json between quotes with \r\n inserted + $json = json_decode($cleanedContent); + + foreach ($json as $article) { + $item = []; + + $articleUri = self::URI . '/' . $article->{'_system_'}->name; + + $item['uri'] = $articleUri; + $item['title'] = $article->headline; + $item['content'] = self::getItemContent($articleUri); + $item['timestamp'] = strtotime($article->date); + + $this->items[] = $item; + } + } + + protected function getItemContent($articleUri) + { + // We need to change the headers as the normal desktop website + // use canvas-based image carousels for some pictures + $headers = [ + 'User-Agent: Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)', + ]; + $article_html = getSimpleHTMLDOMCached($articleUri, 86400, $headers); + + $content = ''; if (!$article_html) { - $item['content'] .= '
Could not request ' . $this->getName() . ': ' . $item['uri'] . '
'; - return $item; + $content .= 'Could not request ' . $this->getName() . ': ' . $articleUri . '
'; + return $content; } $article_html = defaultLinkTo($article_html, $this->getURI()); @@ -23,6 +60,20 @@ class GatesNotesBridge extends FeedExpander $hero_image = 'getAttribute('data-src') . '>'; $article_body = $article_html->find('div.TGN_Article_ReadTimeSection', 0); + + // Remove the menu bar on some articles (PDF download etc.) + foreach ($article_body->find('.TGN_MenuHolder') as $found) { + $found->remove(); + } + + // For the carousels pictures, we still to remove the lazy-loading and force the real picture + foreach ($article_body->find('canvas') as $found) { + $found->remove(); + } + foreach ($article_body->find('.TGN_PE_C_Img') as $found) { + $found->setAttribute('src', $found->getAttribute('data-src')); + } + // Convert iframe of Youtube videos to link foreach ($article_body->find('iframe') as $found) { $iframeUrl = $found->getAttribute('src'); @@ -32,6 +83,7 @@ class GatesNotesBridge extends FeedExpander $found->outertext = ''; } } + // Remove CSS ressources foreach ($article_body->find('link') as $found) { $linkedRessourceUrl = $found->getAttribute('href'); @@ -42,14 +94,8 @@ class GatesNotesBridge extends FeedExpander } $article_body = sanitize($article_body->innertext); - $item['content'] = $top_description . $hero_image . $article_body; + $content = $top_description . $hero_image . $article_body; - return $item; - } - - public function collectData() - { - $feed = static::URI . '/rss'; - $this->collectExpandableDatas($feed); + return $content; } }