1
0
mirror of https://github.com/RSS-Bridge/rss-bridge.git synced 2025-01-16 13:50:01 +01:00

[ScribbleHubBridge] Get best-effort information during 403s (#4365)

This commit is contained in:
July 2024-12-11 23:43:17 -05:00 committed by GitHub
parent 9126b0f982
commit d2370320e9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -54,44 +54,72 @@ class ScribbleHubBridge extends FeedExpander
$this->collectExpandableDatas($url);
}
protected $author = '';
private function collectList($url)
{
$html = getSimpleHTMLDOMCached($url);
foreach ($html->find('.search_main_box') as $element) {
$item = [];
$item['author'] = $element->find('[title="Author"]', 0)->plaintext;
$item['enclosures'] = [$element->find('.search_img img', 0)->src];
$title = $element->find('.search_title a', 0);
$item['title'] = $title->plaintext;
$item['uri'] = $title->href;
$item['uid'] = $item['uri'];
$strdate = $element->find('[title="Last Updated"]', 0)->plaintext;
$item['timestamp'] = strtotime($strdate);
$item['uid'] = $item['uri'];
$details = getSimpleHTMLDOMCached($item['uri']);
$item['enclosures'][] = $details->find('.fic_image img', 0)->src;
$item['content'] = $details->find('.wi_fic_desc', 0);
foreach ($details->find('.fic_genre') as $tag) {
foreach ($element->find('.fic_genre') as $tag) {
$item['categories'][] = $tag->plaintext;
}
// Get minimal description in case further requests fail
$item['content'] = str_get_html($element->find('.search_body', 0));
foreach ($item['content']->firstChild()->children() as $child) {
$child->remove();
}
try {
$details = getSimpleHTMLDOMCached($item['uri']);
} catch (HttpException $e) {
// 403 Forbidden, This means we got anti-bot response
if ($e->getCode() === 403 || $e->getCode() === 429) {
$this->items[] = $item;
continue;
}
throw $e;
}
$item['enclosures'] = [$details->find('.fic_image img', 0)->src];
$item['content'] = $details->find('.wi_fic_desc', 0);
foreach ($details->find('.stag') as $tag) {
$item['categories'][] = $tag->plaintext;
}
$read_url = $details->find('.read_buttons a', 0)->href;
$read_html = getSimpleHTMLDOMCached($read_url);
$item['content'] .= '<hr><h3>';
$item['comments'] = $read_url . '#comments';
try {
$read_html = getSimpleHTMLDOMCached($read_url);
} catch (HttpException $e) {
// 403 Forbidden, This means we got anti-bot response
if ($e->getCode() === 403 || $e->getCode() === 429) {
$this->items[] = $item;
continue;
}
throw $e;
}
$item['content'] .= "<hr><h3><a href=\"$read_url\">";
$item['content'] .= $read_html->find('.chapter-title', 0);
$item['content'] .= '</h3>';
$item['content'] .= '</a></h3>';
$item['content'] .= $read_html->find('#chp_raw', 0);
$this->items[] = $item;
}
}
protected $author = '';
protected function parseItem(array $item)
{
//For series, filter out other series from 'All' feed