mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2025-01-17 22:28:22 +01:00
4f6277b6b5
The author name is parsed by searching a string within the entire HTML document: $author = $html->innertext; $author = substr($author, strpos($author, '"author=') + 8); $author = substr($author, 0, strpos($author, '\u0026')); This solution will return big portions of the HTML document if the strpos function returns zero (not found). This commit replaces the previous implementation by searching for a specific script tag and making use of the JSON data inside it. References #580
221 lines
7.2 KiB
PHP
221 lines
7.2 KiB
PHP
<?php
|
|
/**
|
|
* RssBridgeYoutube
|
|
* Returns the newest videos
|
|
* WARNING: to parse big playlists (over ~90 videos), you need to edit simple_html_dom.php:
|
|
* change: define('MAX_FILE_SIZE', 600000);
|
|
* into: define('MAX_FILE_SIZE', 900000); (or more)
|
|
*/
|
|
class YoutubeBridge extends BridgeAbstract {
|
|
|
|
const NAME = 'YouTube Bridge';
|
|
const URI = 'https://www.youtube.com/';
|
|
const CACHE_TIMEOUT = 10800; // 3h
|
|
const DESCRIPTION = 'Returns the 10 newest videos by username/channel/playlist or search';
|
|
const MAINTAINER = 'mitsukarenai';
|
|
|
|
const PARAMETERS = array(
|
|
'By username' => array(
|
|
'u' => array(
|
|
'name' => 'username',
|
|
'exampleValue' => 'test',
|
|
'required' => true
|
|
)
|
|
),
|
|
'By channel id' => array(
|
|
'c' => array(
|
|
'name' => 'channel id',
|
|
'exampleValue' => "15",
|
|
'required' => true
|
|
)
|
|
),
|
|
'By playlist Id' => array(
|
|
'p' => array(
|
|
'name' => 'playlist id',
|
|
'exampleValue' => "15"
|
|
)
|
|
),
|
|
'Search result' => array(
|
|
's' => array(
|
|
'name' => 'search keyword',
|
|
'exampleValue' => 'test'
|
|
),
|
|
'pa' => array(
|
|
'name' => 'page',
|
|
'type' => 'number',
|
|
'exampleValue' => 1
|
|
)
|
|
)
|
|
);
|
|
|
|
private function ytBridgeQueryVideoInfo($vid, &$author, &$desc, &$time){
|
|
$html = $this->ytGetSimpleHTMLDOM(self::URI . "watch?v=$vid");
|
|
|
|
// Skip unavailable videos
|
|
if(!strpos($html->innertext, 'IS_UNAVAILABLE_PAGE')){
|
|
return;
|
|
}
|
|
|
|
foreach($html->find('script') as $script){
|
|
$data = trim($script->innertext);
|
|
|
|
if(strpos($data, '{') !== 0)
|
|
continue; // Wrong script
|
|
|
|
$json = json_decode($data);
|
|
|
|
if(!isset($json->itemListElement))
|
|
continue; // Wrong script
|
|
|
|
$author = $json->itemListElement[0]->item->name;
|
|
}
|
|
|
|
if(!is_null($html->find('div#watch-description-text', 0)))
|
|
$desc = $html->find('div#watch-description-text', 0)->innertext;
|
|
|
|
if(!is_null($html->find('meta[itemprop=datePublished]', 0)))
|
|
$time = strtotime($html->find('meta[itemprop=datePublished]', 0)->getAttribute('content'));
|
|
}
|
|
|
|
private function ytBridgeAddItem($vid, $title, $author, $desc, $time){
|
|
$item = array();
|
|
$item['id'] = $vid;
|
|
$item['title'] = $title;
|
|
$item['author'] = $author;
|
|
$item['timestamp'] = $time;
|
|
$item['uri'] = self::URI . 'watch?v=' . $vid;
|
|
$thumbnailUri = str_replace('/www.', '/img.', self::URI) . 'vi/' . $vid . '/0.jpg';
|
|
$item['content'] = '<a href="' . $item['uri'] . '"><img src="' . $thumbnailUri . '" /></a><br />' . $desc;
|
|
$this->items[] = $item;
|
|
}
|
|
|
|
private function ytBridgeParseXmlFeed($xml) {
|
|
foreach($xml->find('entry') as $element) {
|
|
$title = $this->ytBridgeFixTitle($element->find('title', 0)->plaintext);
|
|
$author = $element->find('name', 0)->plaintext;
|
|
$desc = $element->find('media:description', 0)->innertext;
|
|
|
|
// Make sure the description is easy on the eye :)
|
|
$desc = htmlspecialchars($desc);
|
|
$desc = nl2br($desc);
|
|
$desc = preg_replace('/(http[s]{0,1}\:\/\/[a-zA-Z0-9.\/\?\&=\-_]{4,})/ims',
|
|
'<a href="$1" target="_blank">$1</a> ',
|
|
$desc);
|
|
|
|
$vid = str_replace('yt:video:', '', $element->find('id', 0)->plaintext);
|
|
$time = strtotime($element->find('published', 0)->plaintext);
|
|
if(strpos($vid, 'googleads') === false)
|
|
$this->ytBridgeAddItem($vid, $title, $author, $desc, $time);
|
|
}
|
|
$this->feedName = $this->ytBridgeFixTitle($xml->find('feed > title', 0)->plaintext); // feedName will be used by getName()
|
|
}
|
|
|
|
private function ytBridgeParseHtmlListing($html, $element_selector, $title_selector){
|
|
$limit = 10;
|
|
$count = 0;
|
|
foreach($html->find($element_selector) as $element) {
|
|
if($count < $limit) {
|
|
$author = '';
|
|
$desc = '';
|
|
$time = 0;
|
|
$vid = str_replace('/watch?v=', '', $element->find('a', 0)->href);
|
|
$vid = substr($vid, 0, strpos($vid, '&') ?: strlen($vid));
|
|
$title = $this->ytBridgeFixTitle($element->find($title_selector, 0)->plaintext);
|
|
if($title != '[Private Video]' && strpos($vid, 'googleads') === false) {
|
|
$this->ytBridgeQueryVideoInfo($vid, $author, $desc, $time);
|
|
$this->ytBridgeAddItem($vid, $title, $author, $desc, $time);
|
|
$count++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
private function ytBridgeFixTitle($title) {
|
|
// convert both Ӓ and " to UTF-8
|
|
return html_entity_decode($title, ENT_QUOTES, 'UTF-8');
|
|
}
|
|
|
|
private function ytGetSimpleHTMLDOM($url){
|
|
return getSimpleHTMLDOM($url,
|
|
$use_include_path = false,
|
|
$context = null,
|
|
$offset = 0,
|
|
$maxLen = null,
|
|
$lowercase = true,
|
|
$forceTagsClosed = true,
|
|
$target_charset = DEFAULT_TARGET_CHARSET,
|
|
$stripRN = false,
|
|
$defaultBRText = DEFAULT_BR_TEXT,
|
|
$defaultSpanText = DEFAULT_SPAN_TEXT);
|
|
}
|
|
|
|
public function collectData(){
|
|
|
|
$xml = '';
|
|
$html = '';
|
|
$url_feed = '';
|
|
$url_listing = '';
|
|
|
|
if($this->getInput('u')) { /* User and Channel modes */
|
|
$this->request = $this->getInput('u');
|
|
$url_feed = self::URI . 'feeds/videos.xml?user=' . urlencode($this->request);
|
|
$url_listing = self::URI . 'user/' . urlencode($this->request) . '/videos';
|
|
} elseif($this->getInput('c')) {
|
|
$this->request = $this->getInput('c');
|
|
$url_feed = self::URI . 'feeds/videos.xml?channel_id=' . urlencode($this->request);
|
|
$url_listing = self::URI . 'channel/' . urlencode($this->request) . '/videos';
|
|
}
|
|
|
|
if(!empty($url_feed) && !empty($url_listing)) {
|
|
if($xml = $this->ytGetSimpleHTMLDOM($url_feed)) {
|
|
$this->ytBridgeParseXmlFeed($xml);
|
|
} elseif($html = $this->ytGetSimpleHTMLDOM($url_listing)) {
|
|
$this->ytBridgeParseHtmlListing($html, 'li.channels-content-item', 'h3');
|
|
} else {
|
|
returnServerError("Could not request YouTube. Tried:\n - $url_feed\n - $url_listing");
|
|
}
|
|
} elseif($this->getInput('p')) { /* playlist mode */
|
|
$this->request = $this->getInput('p');
|
|
$url_listing = self::URI . 'playlist?list=' . urlencode($this->request);
|
|
$html = $this->ytGetSimpleHTMLDOM($url_listing)
|
|
or returnServerError("Could not request YouTube. Tried:\n - $url_listing");
|
|
$this->ytBridgeParseHtmlListing($html, 'tr.pl-video', '.pl-video-title a');
|
|
$this->feedName = 'Playlist: ' . str_replace(' - YouTube', '', $html->find('title', 0)->plaintext); // feedName will be used by getName()
|
|
} elseif($this->getInput('s')) { /* search mode */
|
|
$this->request = $this->getInput('s');
|
|
$page = 1;
|
|
if($this->getInput('pa'))
|
|
$page = (int)preg_replace("/[^0-9]/", '', $this->getInput('pa'));
|
|
|
|
$url_listing = self::URI
|
|
. 'results?search_query='
|
|
. urlencode($this->request)
|
|
. '&page='
|
|
. $page
|
|
. '&filters=video&search_sort=video_date_uploaded';
|
|
|
|
$html = $this->ytGetSimpleHTMLDOM($url_listing)
|
|
or returnServerError("Could not request YouTube. Tried:\n - $url_listing");
|
|
|
|
$this->ytBridgeParseHtmlListing($html, 'div.yt-lockup', 'h3');
|
|
$this->feedName = 'Search: ' . str_replace(' - YouTube', '', $html->find('title', 0)->plaintext); // feedName will be used by getName()
|
|
} else { /* no valid mode */
|
|
returnClientError("You must either specify either:\n - YouTube
|
|
username (?u=...)\n - Channel id (?c=...)\n - Playlist id (?p=...)\n - Search (?s=...)");
|
|
}
|
|
}
|
|
|
|
public function getName(){
|
|
// Name depends on queriedContext:
|
|
switch($this->queriedContext) {
|
|
case 'By username':
|
|
case 'By channel id':
|
|
case 'By playlist Id':
|
|
case 'Search result':
|
|
return $this->feedName . ' - YouTube'; // We already know it's a bridge, right?
|
|
default:
|
|
return parent::getName();
|
|
}
|
|
}
|
|
}
|