From 24f3bb84d8f8a4638161c7056cc2e8cbea73584c Mon Sep 17 00:00:00 2001 From: ORelio <oreliogitantispam.l0gin@spamgourmet.com> Date: Thu, 22 Oct 2015 14:51:56 +0200 Subject: [PATCH] Add CNET News bridge --- bridges/CNETBridge.php | 99 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 bridges/CNETBridge.php diff --git a/bridges/CNETBridge.php b/bridges/CNETBridge.php new file mode 100644 index 00000000..dfa21d4a --- /dev/null +++ b/bridges/CNETBridge.php @@ -0,0 +1,99 @@ +<?php +/** +* CNET Bridge +* Returns the newest articles +* 2015-09-08 +* +* @name CNET News +* @homepage http://www.cnet.com/ +* @description Returns the newest articles. <br /> You may specify a topic, else all topics are selected. +* @maintainer ORelio +* @update 2015-09-10 +* @use1(topic="Topic name") +*/ +class CNETBridge extends BridgeAbstract { + + private $topicName = ''; + + public function collectData(array $param) { + + function ExtractFromDelimiters($string, $start, $end) { + if (strpos($string, $start) !== false) { + $section_retrieved = substr($string, strpos($string, $start) + strlen($start)); + $section_retrieved = substr($section_retrieved, 0, strpos($section_retrieved, $end)); + return $section_retrieved; + } return false; + } + + function StripWithDelimiters($string, $start, $end) { + while (strpos($string, $start) !== false) { + $section_to_remove = substr($string, strpos($string, $start)); + $section_to_remove = substr($section_to_remove, 0, strpos($section_to_remove, $end) + strlen($end)); + $string = str_replace($section_to_remove, '', $string); + } return $string; + } + + function CleanArticle($article_html) { + $article_html = '<p>'.substr($article_html, strpos($article_html, '</script></div><p>') + 18); + $article_html = StripWithDelimiters($article_html, '<script>', '</script>'); + $article_html = StripWithDelimiters($article_html, '<div class="shortcode related-links', '</div>'); + $article_html = StripWithDelimiters($article_html, '<a class="clickToEnlarge">', '</a>'); + return $article_html; + } + + if (!empty($param['topic'])) + $this->topicName = $param['topic']; + + $pageUrl = 'http://www.cnet.com/'.(empty($this->topicName) ? '' : 'topics/'.$this->topicName.'/'); + $html = file_get_html($pageUrl) or $this->returnError('Could not request CNET: '.$pageUrl, 404); + $limit = 0; + + foreach($html->find('div.socialSharingSmall') as $element) { + if ($limit < 4) { + + $article_meta = json_decode(ExtractFromDelimiters($element->outertext, 'data-social-counts-options=\'', '\'>')); + $article_title = $article_meta->title; + $article_uri = $article_meta->url; + + if (!empty($article_title) && !empty($article_uri) && strpos($article_uri, '/news/') !== false) { + + $article_html = file_get_html($article_uri) or $this->returnError('Could not request CNET: '.$article_uri, 404); + $article_timestamp = strtotime(ExtractFromDelimiters($article_html->innertext, '<time itemprop="datePublished" class="', '">')); + $article_thumbnail = $article_html->find('div.originalImage', 0); + + if (is_null($article_thumbnail)) + $article_thumbnail = $article_html->find('span.imageContainer', 0); + + if (!is_null($article_thumbnail)) + $article_thumbnail = $article_thumbnail->find('img', 0)->src; + + $article_content = trim(CleanArticle(ExtractFromDelimiters($article_html, '</div></div></div><div class="col-8">', '<footer>'))); + $article_author = trim($article_html->find('a.author', 0)->plaintext); + + $item = new \Item(); + $item->uri = $article_uri; + $item->thumbnailUri = $article_thumbnail; + $item->title = $article_title; + $item->author = $article_author; + $item->timestamp = $article_timestamp; + $item->content = $article_content; + $this->items[] = $item; + $limit++; + } + } + } + } + + public function getName() { + return 'CNET News Bridge'.(empty($this->topicName) ? '' : ' - '.$this->topicName); + } + + public function getURI() { + return 'http://www.cnet.com/'; + } + + public function getCacheDuration() { + return 1800; // 30 minutes + // return 0; + } +}