1
0
mirror of https://github.com/RSS-Bridge/rss-bridge.git synced 2025-01-16 21:58:21 +01:00

[ScribbleHubBridge] Get best-effort information during 403s (#4365)

This commit is contained in:
July 2024-12-11 23:43:17 -05:00 committed by GitHub
parent 9126b0f982
commit d2370320e9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -54,44 +54,72 @@ class ScribbleHubBridge extends FeedExpander
$this->collectExpandableDatas($url); $this->collectExpandableDatas($url);
} }
protected $author = '';
private function collectList($url) private function collectList($url)
{ {
$html = getSimpleHTMLDOMCached($url); $html = getSimpleHTMLDOMCached($url);
foreach ($html->find('.search_main_box') as $element) { foreach ($html->find('.search_main_box') as $element) {
$item = []; $item = [];
$item['author'] = $element->find('[title="Author"]', 0)->plaintext;
$item['enclosures'] = [$element->find('.search_img img', 0)->src];
$title = $element->find('.search_title a', 0); $title = $element->find('.search_title a', 0);
$item['title'] = $title->plaintext; $item['title'] = $title->plaintext;
$item['uri'] = $title->href; $item['uri'] = $title->href;
$item['uid'] = $item['uri'];
$strdate = $element->find('[title="Last Updated"]', 0)->plaintext; $strdate = $element->find('[title="Last Updated"]', 0)->plaintext;
$item['timestamp'] = strtotime($strdate); $item['timestamp'] = strtotime($strdate);
$item['uid'] = $item['uri'];
$details = getSimpleHTMLDOMCached($item['uri']); foreach ($element->find('.fic_genre') as $tag) {
$item['enclosures'][] = $details->find('.fic_image img', 0)->src;
$item['content'] = $details->find('.wi_fic_desc', 0);
foreach ($details->find('.fic_genre') as $tag) {
$item['categories'][] = $tag->plaintext; $item['categories'][] = $tag->plaintext;
} }
// Get minimal description in case further requests fail
$item['content'] = str_get_html($element->find('.search_body', 0));
foreach ($item['content']->firstChild()->children() as $child) {
$child->remove();
}
try {
$details = getSimpleHTMLDOMCached($item['uri']);
} catch (HttpException $e) {
// 403 Forbidden, This means we got anti-bot response
if ($e->getCode() === 403 || $e->getCode() === 429) {
$this->items[] = $item;
continue;
}
throw $e;
}
$item['enclosures'] = [$details->find('.fic_image img', 0)->src];
$item['content'] = $details->find('.wi_fic_desc', 0);
foreach ($details->find('.stag') as $tag) { foreach ($details->find('.stag') as $tag) {
$item['categories'][] = $tag->plaintext; $item['categories'][] = $tag->plaintext;
} }
$read_url = $details->find('.read_buttons a', 0)->href; $read_url = $details->find('.read_buttons a', 0)->href;
$read_html = getSimpleHTMLDOMCached($read_url); $item['comments'] = $read_url . '#comments';
$item['content'] .= '<hr><h3>'; try {
$read_html = getSimpleHTMLDOMCached($read_url);
} catch (HttpException $e) {
// 403 Forbidden, This means we got anti-bot response
if ($e->getCode() === 403 || $e->getCode() === 429) {
$this->items[] = $item;
continue;
}
throw $e;
}
$item['content'] .= "<hr><h3><a href=\"$read_url\">";
$item['content'] .= $read_html->find('.chapter-title', 0); $item['content'] .= $read_html->find('.chapter-title', 0);
$item['content'] .= '</h3>'; $item['content'] .= '</a></h3>';
$item['content'] .= $read_html->find('#chp_raw', 0); $item['content'] .= $read_html->find('#chp_raw', 0);
$this->items[] = $item; $this->items[] = $item;
} }
} }
protected $author = '';
protected function parseItem(array $item) protected function parseItem(array $item)
{ {
//For series, filter out other series from 'All' feed //For series, filter out other series from 'All' feed