1
0
mirror of https://github.com/RSS-Bridge/rss-bridge.git synced 2025-08-24 00:53:10 +02:00

fix: rewrite and improve caching (#3594)

This commit is contained in:
Dag
2023-09-10 21:50:15 +02:00
committed by GitHub
parent a786bbd4e0
commit 4b9f6f7e53
45 changed files with 993 additions and 1169 deletions

View File

@@ -33,6 +33,7 @@ class AO3Bridge extends BridgeAbstract
],
]
];
private $title;
public function collectData()
{
@@ -94,11 +95,12 @@ class AO3Bridge extends BridgeAbstract
$url = self::URI . "/works/$id/navigate";
$httpClient = RssBridge::getHttpClient();
$version = 'v0.0.1';
$response = $httpClient->request($url, [
'useragent' => 'rss-bridge bot (https://github.com/RSS-Bridge/rss-bridge)',
'useragent' => "rss-bridge $version (https://github.com/RSS-Bridge/rss-bridge)",
]);
$html = \str_get_html($response['body']);
$html = \str_get_html($response->getBody());
$html = defaultLinkTo($html, self::URI);
$this->title = $html->find('h2 a', 0)->plaintext;

View File

@@ -159,7 +159,7 @@ class BugzillaBridge extends BridgeAbstract
protected function getUser($user)
{
// Check if the user endpoint is available
if ($this->loadCacheValue($this->instance . 'userEndpointClosed', 86400)) {
if ($this->loadCacheValue($this->instance . 'userEndpointClosed')) {
return $user;
}

View File

@@ -114,18 +114,17 @@ class ElloBridge extends BridgeAbstract
private function getAPIKey()
{
$cache = RssBridge::getCache();
$cache->setScope('ElloBridge');
$cache->setKey(['key']);
$key = $cache->loadData();
$cacheKey = 'ElloBridge_key';
$apiKey = $cache->get($cacheKey);
if ($key == null) {
$keyInfo = getContents(self::URI . 'api/webapp-token') or
returnServerError('Unable to get token.');
$key = json_decode($keyInfo)->token->access_token;
$cache->saveData($key);
if (!$apiKey) {
$keyInfo = getContents(self::URI . 'api/webapp-token') or returnServerError('Unable to get token.');
$apiKey = json_decode($keyInfo)->token->access_token;
$ttl = 60 * 60 * 20;
$cache->set($cacheKey, $apiKey, $ttl);
}
return $key;
return $apiKey;
}
public function getName()

View File

@@ -99,23 +99,22 @@ class InstagramBridge extends BridgeAbstract
}
$cache = RssBridge::getCache();
$cache->setScope('InstagramBridge');
$cache->setKey([$username]);
$key = $cache->loadData();
$cacheKey = 'InstagramBridge_' . $username;
$pk = $cache->get($cacheKey);
if ($key == null) {
if (!$pk) {
$data = $this->getContents(self::URI . 'web/search/topsearch/?query=' . $username);
foreach (json_decode($data)->users as $user) {
if (strtolower($user->user->username) === strtolower($username)) {
$key = $user->user->pk;
$pk = $user->user->pk;
}
}
if ($key == null) {
if (!$pk) {
returnServerError('Unable to find username in search result.');
}
$cache->saveData($key);
$cache->set($cacheKey, $pk);
}
return $key;
return $pk;
}
public function collectData()

View File

@@ -100,7 +100,7 @@ class MastodonBridge extends BridgeAbstract
// We fetch the boosted content.
try {
$rtContent = $this->fetchAP($content['object']);
$rtUser = $this->loadCacheValue($rtContent['attributedTo'], 86400);
$rtUser = $this->loadCacheValue($rtContent['attributedTo']);
if (!isset($rtUser)) {
// We fetch the author, since we cannot always assume the format of the URL.
$user = $this->fetchAP($rtContent['attributedTo']);

View File

@@ -72,8 +72,30 @@ class RedditBridge extends BridgeAbstract
]
]
];
private CacheInterface $cache;
public function __construct()
{
$this->cache = RssBridge::getCache();
}
public function collectData()
{
$cacheKey = 'reddit_rate_limit';
if ($this->cache->get($cacheKey)) {
throw new HttpException('429 Too Many Requests', 429);
}
try {
$this->collectDataInternal();
} catch (HttpException $e) {
if ($e->getCode() === 429) {
$this->cache->set($cacheKey, true, 60 * 16);
throw $e;
}
}
}
private function collectDataInternal(): void
{
$user = false;
$comments = false;

View File

@@ -36,7 +36,7 @@ class SoundCloudBridge extends BridgeAbstract
private $feedTitle = null;
private $feedIcon = null;
private $cache = null;
private CacheInterface $cache;
private $clientIdRegex = '/client_id.*?"(.+?)"/';
private $widgetRegex = '/widget-.+?\.js/';
@@ -44,8 +44,6 @@ class SoundCloudBridge extends BridgeAbstract
public function collectData()
{
$this->cache = RssBridge::getCache();
$this->cache->setScope('SoundCloudBridge');
$this->cache->setKey(['client_id']);
$res = $this->getUser($this->getInput('u'));
@@ -121,11 +119,9 @@ HTML;
private function getClientID()
{
$this->cache->setScope('SoundCloudBridge');
$this->cache->setKey(['client_id']);
$clientID = $this->cache->loadData();
$clientID = $this->cache->get('SoundCloudBridge_client_id');
if ($clientID == null) {
if (!$clientID) {
return $this->refreshClientID();
} else {
return $clientID;
@@ -151,10 +147,7 @@ HTML;
if (preg_match($this->clientIdRegex, $widgetJS, $matches)) {
$clientID = $matches[1];
$this->cache->setScope('SoundCloudBridge');
$this->cache->setKey(['client_id']);
$this->cache->saveData($clientID);
$this->cache->set('SoundCloudBridge_client_id', $clientID);
return $clientID;
}
}

View File

@@ -279,10 +279,9 @@ class SpotifyBridge extends BridgeAbstract
private function fetchAccessToken()
{
$cache = RssBridge::getCache();
$cacheKey = sprintf('%s:%s', $this->getInput('clientid'), $this->getInput('clientsecret'));
$cache->setScope('SpotifyBridge');
$cache->setKey([$cacheKey]);
$token = $cache->loadData(3600);
$cacheKey = sprintf('SpotifyBridge:%s:%s', $this->getInput('clientid'), $this->getInput('clientsecret'));
$token = $cache->get($cacheKey);
if ($token) {
$this->token = $token;
} else {
@@ -294,9 +293,8 @@ class SpotifyBridge extends BridgeAbstract
]);
$data = Json::decode($json);
$this->token = $data['access_token'];
$cache->setScope('SpotifyBridge');
$cache->setKey([$cacheKey]);
$cache->saveData($this->token);
$cache->set($cacheKey, $this->token, 3600);
}
}

View File

@@ -594,156 +594,4 @@ EOD;
{
return (intval($tweet1['id']) < intval($tweet2['id']) ? 1 : -1);
}
//The aim of this function is to get an API key and a guest token
//This function takes 2 requests, and therefore is cached
private function getApiKey($forceNew = 0)
{
$r_cache = RssBridge::getCache();
$scope = 'TwitterBridge';
$r_cache->setScope($scope);
$r_cache->setKey(['refresh']);
$data = $r_cache->loadData();
$refresh = null;
if ($data === null) {
$refresh = time();
$r_cache->saveData($refresh);
} else {
$refresh = $data;
}
$cacheFactory = new CacheFactory();
$cache = RssBridge::getCache();
$cache->setScope($scope);
$cache->setKey(['api_key']);
$data = $cache->loadData();
$apiKey = null;
if ($forceNew || $data === null || (time() - $refresh) > self::GUEST_TOKEN_EXPIRY) {
$twitterPage = getContents('https://twitter.com');
$jsLink = false;
$jsMainRegexArray = [
'/(https:\/\/abs\.twimg\.com\/responsive-web\/web\/main\.[^\.]+\.js)/m',
'/(https:\/\/abs\.twimg\.com\/responsive-web\/web_legacy\/main\.[^\.]+\.js)/m',
'/(https:\/\/abs\.twimg\.com\/responsive-web\/client-web\/main\.[^\.]+\.js)/m',
'/(https:\/\/abs\.twimg\.com\/responsive-web\/client-web-legacy\/main\.[^\.]+\.js)/m',
];
foreach ($jsMainRegexArray as $jsMainRegex) {
if (preg_match_all($jsMainRegex, $twitterPage, $jsMainMatches, PREG_SET_ORDER, 0)) {
$jsLink = $jsMainMatches[0][0];
break;
}
}
if (!$jsLink) {
returnServerError('Could not locate main.js link');
}
$jsContent = getContents($jsLink);
$apiKeyRegex = '/([a-zA-Z0-9]{59}%[a-zA-Z0-9]{44})/m';
preg_match_all($apiKeyRegex, $jsContent, $apiKeyMatches, PREG_SET_ORDER, 0);
$apiKey = $apiKeyMatches[0][0];
$cache->saveData($apiKey);
} else {
$apiKey = $data;
}
$gt_cache = RssBridge::getCache();
$gt_cache->setScope($scope);
$gt_cache->setKey(['guest_token']);
$guestTokenUses = $gt_cache->loadData();
$guestToken = null;
if (
$forceNew || $guestTokenUses === null || !is_array($guestTokenUses) || count($guestTokenUses) != 2
|| $guestTokenUses[0] <= 0 || (time() - $refresh) > self::GUEST_TOKEN_EXPIRY
) {
$guestToken = $this->getGuestToken($apiKey);
if ($guestToken === null) {
if ($guestTokenUses === null) {
returnServerError('Could not parse guest token');
} else {
$guestToken = $guestTokenUses[1];
}
} else {
$gt_cache->saveData([self::GUEST_TOKEN_USES, $guestToken]);
$r_cache->saveData(time());
}
} else {
$guestTokenUses[0] -= 1;
$gt_cache->saveData($guestTokenUses);
$guestToken = $guestTokenUses[1];
}
$this->apiKey = $apiKey;
$this->guestToken = $guestToken;
$this->authHeaders = [
'authorization: Bearer ' . $apiKey,
'x-guest-token: ' . $guestToken,
];
return [$apiKey, $guestToken];
}
// Get a guest token. This is different to an API key,
// and it seems to change more regularly than the API key.
private function getGuestToken($apiKey)
{
$headers = [
'authorization: Bearer ' . $apiKey,
];
$opts = [
CURLOPT_POST => 1,
];
try {
$pageContent = getContents('https://api.twitter.com/1.1/guest/activate.json', $headers, $opts, true);
$guestToken = json_decode($pageContent['content'])->guest_token;
} catch (Exception $e) {
$guestToken = null;
}
return $guestToken;
}
/**
* Tries to make an API call to twitter.
* @param $api string API entry point
* @param $params array additional URI parmaeters
* @return object json data
*/
private function makeApiCall($api, $params)
{
$uri = self::API_URI . $api . '?' . http_build_query($params);
$retries = 1;
$retry = 0;
do {
$retry = 0;
try {
$result = getContents($uri, $this->authHeaders, [], true);
} catch (HttpException $e) {
switch ($e->getCode()) {
case 401:
// fall-through
case 403:
if ($retries) {
$retries--;
$retry = 1;
$this->getApiKey(1);
continue 2;
}
// fall-through
default:
throw $e;
}
}
} while ($retry);
$data = json_decode($result['content']);
return $data;
}
}

View File

@@ -117,7 +117,7 @@ The default URI shows the Madara demo page.';
protected function getMangaInfo($url)
{
$url_cache = 'TitleInfo_' . preg_replace('/[^\w]/', '.', rtrim($url, '/'));
$cache = $this->loadCacheValue($url_cache, 86400);
$cache = $this->loadCacheValue($url_cache);
if (isset($cache)) {
return $cache;
}

View File

@@ -77,6 +77,138 @@ class YoutubeBridge extends BridgeAbstract
private $channel_name = '';
// This took from repo BetterVideoRss of VerifiedJoseph.
const URI_REGEX = '/(https?:\/\/(?:www\.)?(?:[a-zA-Z0-9-.]{2,256}\.[a-z]{2,20})(\:[0-9]{2 ,4})?(?:\/[a-zA-Z0-9@:%_\+.,~#"\'!?&\/\/=\-*]+|\/)?)/ims'; //phpcs:ignore
private CacheInterface $cache;
public function __construct()
{
$this->cache = RssBridge::getCache();
}
private function collectDataInternal()
{
$xml = '';
$html = '';
$url_feed = '';
$url_listing = '';
if ($this->getInput('u')) {
/* User and Channel modes */
$this->request = $this->getInput('u');
$url_feed = self::URI . 'feeds/videos.xml?user=' . urlencode($this->request);
$url_listing = self::URI . 'user/' . urlencode($this->request) . '/videos';
} elseif ($this->getInput('c')) {
$this->request = $this->getInput('c');
$url_feed = self::URI . 'feeds/videos.xml?channel_id=' . urlencode($this->request);
$url_listing = self::URI . 'channel/' . urlencode($this->request) . '/videos';
} elseif ($this->getInput('custom')) {
$this->request = $this->getInput('custom');
$url_listing = self::URI . urlencode($this->request) . '/videos';
}
if (!empty($url_feed) || !empty($url_listing)) {
$this->feeduri = $url_listing;
if (!empty($this->getInput('custom'))) {
$html = $this->ytGetSimpleHTMLDOM($url_listing);
$jsonData = $this->getJSONData($html);
$url_feed = $jsonData->metadata->channelMetadataRenderer->rssUrl;
$this->iconURL = $jsonData->metadata->channelMetadataRenderer->avatar->thumbnails[0]->url;
}
if (!$this->skipFeeds()) {
$html = $this->ytGetSimpleHTMLDOM($url_feed);
$this->ytBridgeParseXmlFeed($html);
} else {
if (empty($this->getInput('custom'))) {
$html = $this->ytGetSimpleHTMLDOM($url_listing);
$jsonData = $this->getJSONData($html);
}
$channel_id = '';
if (isset($jsonData->contents)) {
$channel_id = $jsonData->metadata->channelMetadataRenderer->externalId;
$jsonData = $jsonData->contents->twoColumnBrowseResultsRenderer->tabs[1];
$jsonData = $jsonData->tabRenderer->content->richGridRenderer->contents;
// $jsonData = $jsonData->itemSectionRenderer->contents[0]->gridRenderer->items;
$this->parseJSONListing($jsonData);
} else {
returnServerError('Unable to get data from YouTube. Username/Channel: ' . $this->request);
}
}
$this->feedName = str_replace(' - YouTube', '', $html->find('title', 0)->plaintext);
} elseif ($this->getInput('p')) {
/* playlist mode */
// TODO: this mode makes a lot of excess video query requests.
// To make less requests, we need to cache following dictionary "videoId -> datePublished, duration"
// This cache will be used to find out, which videos to fetch
// to make feed of 15 items or more, if there a lot of videos published on that date.
$this->request = $this->getInput('p');
$url_feed = self::URI . 'feeds/videos.xml?playlist_id=' . urlencode($this->request);
$url_listing = self::URI . 'playlist?list=' . urlencode($this->request);
$html = $this->ytGetSimpleHTMLDOM($url_listing);
$jsonData = $this->getJSONData($html);
// TODO: this method returns only first 100 video items
// if it has more videos, playlistVideoListRenderer will have continuationItemRenderer as last element
$jsonData = $jsonData->contents->twoColumnBrowseResultsRenderer->tabs[0];
$jsonData = $jsonData->tabRenderer->content->sectionListRenderer->contents[0]->itemSectionRenderer;
$jsonData = $jsonData->contents[0]->playlistVideoListRenderer->contents;
$item_count = count($jsonData);
if ($item_count <= 15 && !$this->skipFeeds() && ($xml = $this->ytGetSimpleHTMLDOM($url_feed))) {
$this->ytBridgeParseXmlFeed($xml);
} else {
$this->parseJSONListing($jsonData);
}
$this->feedName = 'Playlist: ' . str_replace(' - YouTube', '', $html->find('title', 0)->plaintext);
usort($this->items, function ($item1, $item2) {
if (!is_int($item1['timestamp']) && !is_int($item2['timestamp'])) {
$item1['timestamp'] = strtotime($item1['timestamp']);
$item2['timestamp'] = strtotime($item2['timestamp']);
}
return $item2['timestamp'] - $item1['timestamp'];
});
} elseif ($this->getInput('s')) {
/* search mode */
$this->request = $this->getInput('s');
$url_listing = self::URI
. 'results?search_query='
. urlencode($this->request)
. '&sp=CAI%253D';
$html = $this->ytGetSimpleHTMLDOM($url_listing);
$jsonData = $this->getJSONData($html);
$jsonData = $jsonData->contents->twoColumnSearchResultsRenderer->primaryContents;
$jsonData = $jsonData->sectionListRenderer->contents;
foreach ($jsonData as $data) {
// Search result includes some ads, have to filter them
if (isset($data->itemSectionRenderer->contents[0]->videoRenderer)) {
$jsonData = $data->itemSectionRenderer->contents;
break;
}
}
$this->parseJSONListing($jsonData);
$this->feeduri = $url_listing;
$this->feedName = 'Search: ' . $this->request;
} else {
/* no valid mode */
returnClientError("You must either specify either:\n - YouTube
username (?u=...)\n - Channel id (?c=...)\n - Playlist id (?p=...)\n - Search (?s=...)");
}
}
public function collectData()
{
$cacheKey = 'youtube_rate_limit';
if ($this->cache->get($cacheKey)) {
throw new HttpException('429 Too Many Requests', 429);
}
try {
$this->collectDataInternal();
} catch (HttpException $e) {
if ($e->getCode() === 429) {
$this->cache->set($cacheKey, true, 60 * 16);
throw $e;
}
}
}
private function ytBridgeQueryVideoInfo($vid, &$author, &$desc, &$time)
{
@@ -153,7 +285,8 @@ class YoutubeBridge extends BridgeAbstract
$item['timestamp'] = $time;
$item['uri'] = self::URI . 'watch?v=' . $vid;
if (!$thumbnail) {
$thumbnail = '0'; // Fallback to default thumbnail if there aren't any provided.
// Fallback to default thumbnail if there aren't any provided.
$thumbnail = '0';
}
$thumbnailUri = str_replace('/www.', '/img.', self::URI) . 'vi/' . $vid . '/' . $thumbnail . '.jpg';
$item['content'] = '<a href="' . $item['uri'] . '"><img src="' . $thumbnailUri . '" /></a><br />' . $desc;
@@ -315,111 +448,6 @@ class YoutubeBridge extends BridgeAbstract
}
}
public function collectData()
{
$xml = '';
$html = '';
$url_feed = '';
$url_listing = '';
if ($this->getInput('u')) { /* User and Channel modes */
$this->request = $this->getInput('u');
$url_feed = self::URI . 'feeds/videos.xml?user=' . urlencode($this->request);
$url_listing = self::URI . 'user/' . urlencode($this->request) . '/videos';
} elseif ($this->getInput('c')) {
$this->request = $this->getInput('c');
$url_feed = self::URI . 'feeds/videos.xml?channel_id=' . urlencode($this->request);
$url_listing = self::URI . 'channel/' . urlencode($this->request) . '/videos';
} elseif ($this->getInput('custom')) {
$this->request = $this->getInput('custom');
$url_listing = self::URI . urlencode($this->request) . '/videos';
}
if (!empty($url_feed) || !empty($url_listing)) {
$this->feeduri = $url_listing;
if (!empty($this->getInput('custom'))) {
$html = $this->ytGetSimpleHTMLDOM($url_listing);
$jsonData = $this->getJSONData($html);
$url_feed = $jsonData->metadata->channelMetadataRenderer->rssUrl;
$this->iconURL = $jsonData->metadata->channelMetadataRenderer->avatar->thumbnails[0]->url;
}
if (!$this->skipFeeds()) {
$html = $this->ytGetSimpleHTMLDOM($url_feed);
$this->ytBridgeParseXmlFeed($html);
} else {
if (empty($this->getInput('custom'))) {
$html = $this->ytGetSimpleHTMLDOM($url_listing);
$jsonData = $this->getJSONData($html);
}
$channel_id = '';
if (isset($jsonData->contents)) {
$channel_id = $jsonData->metadata->channelMetadataRenderer->externalId;
$jsonData = $jsonData->contents->twoColumnBrowseResultsRenderer->tabs[1];
$jsonData = $jsonData->tabRenderer->content->richGridRenderer->contents;
// $jsonData = $jsonData->itemSectionRenderer->contents[0]->gridRenderer->items;
$this->parseJSONListing($jsonData);
} else {
returnServerError('Unable to get data from YouTube. Username/Channel: ' . $this->request);
}
}
$this->feedName = str_replace(' - YouTube', '', $html->find('title', 0)->plaintext);
} elseif ($this->getInput('p')) { /* playlist mode */
// TODO: this mode makes a lot of excess video query requests.
// To make less requests, we need to cache following dictionary "videoId -> datePublished, duration"
// This cache will be used to find out, which videos to fetch
// to make feed of 15 items or more, if there a lot of videos published on that date.
$this->request = $this->getInput('p');
$url_feed = self::URI . 'feeds/videos.xml?playlist_id=' . urlencode($this->request);
$url_listing = self::URI . 'playlist?list=' . urlencode($this->request);
$html = $this->ytGetSimpleHTMLDOM($url_listing);
$jsonData = $this->getJSONData($html);
// TODO: this method returns only first 100 video items
// if it has more videos, playlistVideoListRenderer will have continuationItemRenderer as last element
$jsonData = $jsonData->contents->twoColumnBrowseResultsRenderer->tabs[0];
$jsonData = $jsonData->tabRenderer->content->sectionListRenderer->contents[0]->itemSectionRenderer;
$jsonData = $jsonData->contents[0]->playlistVideoListRenderer->contents;
$item_count = count($jsonData);
if ($item_count <= 15 && !$this->skipFeeds() && ($xml = $this->ytGetSimpleHTMLDOM($url_feed))) {
$this->ytBridgeParseXmlFeed($xml);
} else {
$this->parseJSONListing($jsonData);
}
$this->feedName = 'Playlist: ' . str_replace(' - YouTube', '', $html->find('title', 0)->plaintext); // feedName will be used by getName()
usort($this->items, function ($item1, $item2) {
if (!is_int($item1['timestamp']) && !is_int($item2['timestamp'])) {
$item1['timestamp'] = strtotime($item1['timestamp']);
$item2['timestamp'] = strtotime($item2['timestamp']);
}
return $item2['timestamp'] - $item1['timestamp'];
});
} elseif ($this->getInput('s')) { /* search mode */
$this->request = $this->getInput('s');
$url_listing = self::URI
. 'results?search_query='
. urlencode($this->request)
. '&sp=CAI%253D';
$html = $this->ytGetSimpleHTMLDOM($url_listing);
$jsonData = $this->getJSONData($html);
$jsonData = $jsonData->contents->twoColumnSearchResultsRenderer->primaryContents;
$jsonData = $jsonData->sectionListRenderer->contents;
foreach ($jsonData as $data) { // Search result includes some ads, have to filter them
if (isset($data->itemSectionRenderer->contents[0]->videoRenderer)) {
$jsonData = $data->itemSectionRenderer->contents;
break;
}
}
$this->parseJSONListing($jsonData);
$this->feeduri = $url_listing;
$this->feedName = 'Search: ' . $this->request; // feedName will be used by getName()
} else { /* no valid mode */
returnClientError("You must either specify either:\n - YouTube
username (?u=...)\n - Channel id (?c=...)\n - Playlist id (?p=...)\n - Search (?s=...)");
}
}
private function skipFeeds()
{
return ($this->getInput('duration_min') || $this->getInput('duration_max'));
@@ -438,14 +466,13 @@ class YoutubeBridge extends BridgeAbstract
public function getName()
{
// Name depends on queriedContext:
switch ($this->queriedContext) {
case 'By username':
case 'By channel id':
case 'By custom name':
case 'By playlist Id':
case 'Search result':
return htmlspecialchars_decode($this->feedName) . ' - YouTube'; // We already know it's a bridge, right?
return htmlspecialchars_decode($this->feedName) . ' - YouTube';
default:
return parent::getName();
}