1
0
mirror of https://github.com/RSS-Bridge/rss-bridge.git synced 2025-01-17 22:28:22 +01:00
php-rss-bridge/bridges/GBAtempBridge.php
ORelio c1c998dd13
[GBAtempBridge] Fix content extraction (#2314)
Bridge was broken since GBAtemp's Xenforo 2 upgrade on 2021-09-23
2022-01-29 10:29:01 +05:00

149 lines
5.0 KiB
PHP

<?php
class GBAtempBridge extends BridgeAbstract {
const MAINTAINER = 'ORelio';
const NAME = 'GBAtemp';
const URI = 'https://gbatemp.net/';
const DESCRIPTION = 'GBAtemp is a user friendly underground video game community.';
const PARAMETERS = array( array(
'type' => array(
'name' => 'Type',
'type' => 'list',
'values' => array(
'News' => 'N',
'Reviews' => 'R',
'Tutorials' => 'T',
'Forum' => 'F'
)
)
));
private function buildItem($uri, $title, $author, $timestamp, $thumbnail, $content){
$item = array();
$item['uri'] = $uri;
$item['title'] = $title;
$item['author'] = $author;
$item['timestamp'] = $timestamp;
$item['content'] = $content;
if (!empty($thumbnail)) {
$item['enclosures'] = array($thumbnail);
}
return $item;
}
private function decodeHtmlEntities($text) {
$text = html_entity_decode($text);
$convmap = array(0x0, 0x2FFFF, 0, 0xFFFF);
return trim(mb_decode_numericentity($text, $convmap, 'UTF-8'));
}
private function cleanupPostContent($content, $site_url){
$content = defaultLinkTo($content, self::URI);
$content = stripWithDelimiters($content, '<script', '</script>');
$content = stripWithDelimiters($content, '<svg', '</svg>');
$content = stripRecursiveHTMLSection($content, 'div', '<div class="reactionsBar');
return $this->decodeHtmlEntities($content);
}
private function findItemDate($item){
$time = 0;
$dateField = $item->find('time', 0);
if (is_object($dateField)) {
$time = strtotime($dateField->datetime);
}
return $time;
}
private function findItemImage($item, $selector){
$img = extractFromDelimiters($item->find($selector, 0)->style, 'url(', ')');
$paramPos = strpos($img, '?');
if ($paramPos !== false) {
$img = substr($img, 0, $paramPos);
}
if (!str_ends_with($img, '.png') && !str_ends_with($img, '.jpg')) {
$img = $img . '#.image';
}
return urljoin(self::URI, $img);
}
private function fetchPostContent($uri, $site_url){
$html = getSimpleHTMLDOMCached($uri);
if(!$html) {
return 'Could not request GBAtemp: ' . $uri;
}
$content = $html->find('article.message-body', 0)->innertext;
return $this->cleanupPostContent($content, $site_url);
}
public function collectData(){
$html = getSimpleHTMLDOM(self::URI);
switch($this->getInput('type')) {
case 'N':
foreach($html->find('li.news_item.full') as $newsItem) {
$url = urljoin(self::URI, $newsItem->find('a', 0)->href);
$img = $this->findItemImage($newsItem, 'a.news_image');
$time = $this->findItemDate($newsItem);
$author = $newsItem->find('a.username', 0)->plaintext;
$title = $this->decodeHtmlEntities($newsItem->find('h3.news_title', 0)->plaintext);
$content = $this->fetchPostContent($url, self::URI);
$this->items[] = $this->buildItem($url, $title, $author, $time, $img, $content);
unset($newsItem); // Some items are heavy, freeing the item proactively helps saving memory
}
break;
case 'R':
foreach($html->find('li.portal_review') as $reviewItem) {
$url = urljoin(self::URI, $reviewItem->find('a.review_boxart', 0)->href);
$img = $this->findItemImage($reviewItem, 'a.review_boxart');
$title = $this->decodeHtmlEntities($reviewItem->find('h2.review_title', 0)->plaintext);
$content = getSimpleHTMLDOMCached($url);
$author = $content->find('span.author--name', 0)->plaintext;
$time = $this->findItemDate($content);
$intro = '<p><b>' . ($content->find('div#review_introduction', 0)->plaintext) . '</b></p>';
$review = $content->find('div#review_main', 0)->innertext;
$content = $this->cleanupPostContent($intro . $review, self::URI);
$this->items[] = $this->buildItem($url, $title, $author, $time, $img, $content);
unset($reviewItem); // Free up memory
}
break;
case 'T':
foreach($html->find('li.portal-tutorial') as $tutorialItem) {
$url = urljoin(self::URI, $tutorialItem->find('a', 1)->href);
$title = $this->decodeHtmlEntities($tutorialItem->find('a', 1)->plaintext);
$time = $this->findItemDate($tutorialItem);
$author = $tutorialItem->find('a.username', 0)->plaintext;
$content = $this->fetchPostContent($url, self::URI);
$this->items[] = $this->buildItem($url, $title, $author, $time, null, $content);
unset($tutorialItem); // Free up memory
}
break;
case 'F':
foreach($html->find('li.rc_item') as $postItem) {
$url = urljoin(self::URI, $postItem->find('a', 1)->href);
$title = $this->decodeHtmlEntities($postItem->find('a', 1)->plaintext);
$time = $this->findItemDate($postItem);
$author = $postItem->find('a.username', 0)->plaintext;
$content = $this->fetchPostContent($url, self::URI);
$this->items[] = $this->buildItem($url, $title, $author, $time, null, $content);
unset($postItem); // Free up memory
}
break;
}
}
public function getName() {
if(!is_null($this->getInput('type'))) {
$type = array_search(
$this->getInput('type'),
self::PARAMETERS[$this->queriedContext]['type']['values']
);
return 'GBAtemp ' . $type . ' Bridge';
}
return parent::getName();
}
}