mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2025-01-16 13:50:01 +01:00
[ScribbleHubBridge] Get best-effort information during 403s (#4365)
This commit is contained in:
parent
9126b0f982
commit
d2370320e9
@ -54,44 +54,72 @@ class ScribbleHubBridge extends FeedExpander
|
||||
$this->collectExpandableDatas($url);
|
||||
}
|
||||
|
||||
protected $author = '';
|
||||
|
||||
private function collectList($url)
|
||||
{
|
||||
$html = getSimpleHTMLDOMCached($url);
|
||||
foreach ($html->find('.search_main_box') as $element) {
|
||||
$item = [];
|
||||
|
||||
$item['author'] = $element->find('[title="Author"]', 0)->plaintext;
|
||||
$item['enclosures'] = [$element->find('.search_img img', 0)->src];
|
||||
$title = $element->find('.search_title a', 0);
|
||||
$item['title'] = $title->plaintext;
|
||||
$item['uri'] = $title->href;
|
||||
$item['uid'] = $item['uri'];
|
||||
|
||||
$strdate = $element->find('[title="Last Updated"]', 0)->plaintext;
|
||||
$item['timestamp'] = strtotime($strdate);
|
||||
$item['uid'] = $item['uri'];
|
||||
|
||||
$details = getSimpleHTMLDOMCached($item['uri']);
|
||||
$item['enclosures'][] = $details->find('.fic_image img', 0)->src;
|
||||
$item['content'] = $details->find('.wi_fic_desc', 0);
|
||||
|
||||
foreach ($details->find('.fic_genre') as $tag) {
|
||||
foreach ($element->find('.fic_genre') as $tag) {
|
||||
$item['categories'][] = $tag->plaintext;
|
||||
}
|
||||
|
||||
// Get minimal description in case further requests fail
|
||||
$item['content'] = str_get_html($element->find('.search_body', 0));
|
||||
foreach ($item['content']->firstChild()->children() as $child) {
|
||||
$child->remove();
|
||||
}
|
||||
|
||||
try {
|
||||
$details = getSimpleHTMLDOMCached($item['uri']);
|
||||
} catch (HttpException $e) {
|
||||
// 403 Forbidden, This means we got anti-bot response
|
||||
if ($e->getCode() === 403 || $e->getCode() === 429) {
|
||||
$this->items[] = $item;
|
||||
continue;
|
||||
}
|
||||
throw $e;
|
||||
}
|
||||
$item['enclosures'] = [$details->find('.fic_image img', 0)->src];
|
||||
$item['content'] = $details->find('.wi_fic_desc', 0);
|
||||
|
||||
foreach ($details->find('.stag') as $tag) {
|
||||
$item['categories'][] = $tag->plaintext;
|
||||
}
|
||||
|
||||
$read_url = $details->find('.read_buttons a', 0)->href;
|
||||
$read_html = getSimpleHTMLDOMCached($read_url);
|
||||
$item['content'] .= '<hr><h3>';
|
||||
$item['comments'] = $read_url . '#comments';
|
||||
try {
|
||||
$read_html = getSimpleHTMLDOMCached($read_url);
|
||||
} catch (HttpException $e) {
|
||||
// 403 Forbidden, This means we got anti-bot response
|
||||
if ($e->getCode() === 403 || $e->getCode() === 429) {
|
||||
$this->items[] = $item;
|
||||
continue;
|
||||
}
|
||||
throw $e;
|
||||
}
|
||||
$item['content'] .= "<hr><h3><a href=\"$read_url\">";
|
||||
$item['content'] .= $read_html->find('.chapter-title', 0);
|
||||
$item['content'] .= '</h3>';
|
||||
$item['content'] .= '</a></h3>';
|
||||
$item['content'] .= $read_html->find('#chp_raw', 0);
|
||||
|
||||
$this->items[] = $item;
|
||||
}
|
||||
}
|
||||
|
||||
protected $author = '';
|
||||
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
//For series, filter out other series from 'All' feed
|
||||
|
Loading…
x
Reference in New Issue
Block a user