mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2025-01-16 21:58:21 +01:00
[ScribbleHubBridge] Get best-effort information during 403s (#4365)
This commit is contained in:
parent
9126b0f982
commit
d2370320e9
@ -54,44 +54,72 @@ class ScribbleHubBridge extends FeedExpander
|
|||||||
$this->collectExpandableDatas($url);
|
$this->collectExpandableDatas($url);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected $author = '';
|
|
||||||
|
|
||||||
private function collectList($url)
|
private function collectList($url)
|
||||||
{
|
{
|
||||||
$html = getSimpleHTMLDOMCached($url);
|
$html = getSimpleHTMLDOMCached($url);
|
||||||
foreach ($html->find('.search_main_box') as $element) {
|
foreach ($html->find('.search_main_box') as $element) {
|
||||||
$item = [];
|
$item = [];
|
||||||
|
|
||||||
|
$item['author'] = $element->find('[title="Author"]', 0)->plaintext;
|
||||||
|
$item['enclosures'] = [$element->find('.search_img img', 0)->src];
|
||||||
$title = $element->find('.search_title a', 0);
|
$title = $element->find('.search_title a', 0);
|
||||||
$item['title'] = $title->plaintext;
|
$item['title'] = $title->plaintext;
|
||||||
$item['uri'] = $title->href;
|
$item['uri'] = $title->href;
|
||||||
|
$item['uid'] = $item['uri'];
|
||||||
|
|
||||||
$strdate = $element->find('[title="Last Updated"]', 0)->plaintext;
|
$strdate = $element->find('[title="Last Updated"]', 0)->plaintext;
|
||||||
$item['timestamp'] = strtotime($strdate);
|
$item['timestamp'] = strtotime($strdate);
|
||||||
$item['uid'] = $item['uri'];
|
|
||||||
|
|
||||||
$details = getSimpleHTMLDOMCached($item['uri']);
|
foreach ($element->find('.fic_genre') as $tag) {
|
||||||
$item['enclosures'][] = $details->find('.fic_image img', 0)->src;
|
|
||||||
$item['content'] = $details->find('.wi_fic_desc', 0);
|
|
||||||
|
|
||||||
foreach ($details->find('.fic_genre') as $tag) {
|
|
||||||
$item['categories'][] = $tag->plaintext;
|
$item['categories'][] = $tag->plaintext;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Get minimal description in case further requests fail
|
||||||
|
$item['content'] = str_get_html($element->find('.search_body', 0));
|
||||||
|
foreach ($item['content']->firstChild()->children() as $child) {
|
||||||
|
$child->remove();
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
$details = getSimpleHTMLDOMCached($item['uri']);
|
||||||
|
} catch (HttpException $e) {
|
||||||
|
// 403 Forbidden, This means we got anti-bot response
|
||||||
|
if ($e->getCode() === 403 || $e->getCode() === 429) {
|
||||||
|
$this->items[] = $item;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
throw $e;
|
||||||
|
}
|
||||||
|
$item['enclosures'] = [$details->find('.fic_image img', 0)->src];
|
||||||
|
$item['content'] = $details->find('.wi_fic_desc', 0);
|
||||||
|
|
||||||
foreach ($details->find('.stag') as $tag) {
|
foreach ($details->find('.stag') as $tag) {
|
||||||
$item['categories'][] = $tag->plaintext;
|
$item['categories'][] = $tag->plaintext;
|
||||||
}
|
}
|
||||||
|
|
||||||
$read_url = $details->find('.read_buttons a', 0)->href;
|
$read_url = $details->find('.read_buttons a', 0)->href;
|
||||||
$read_html = getSimpleHTMLDOMCached($read_url);
|
$item['comments'] = $read_url . '#comments';
|
||||||
$item['content'] .= '<hr><h3>';
|
try {
|
||||||
|
$read_html = getSimpleHTMLDOMCached($read_url);
|
||||||
|
} catch (HttpException $e) {
|
||||||
|
// 403 Forbidden, This means we got anti-bot response
|
||||||
|
if ($e->getCode() === 403 || $e->getCode() === 429) {
|
||||||
|
$this->items[] = $item;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
throw $e;
|
||||||
|
}
|
||||||
|
$item['content'] .= "<hr><h3><a href=\"$read_url\">";
|
||||||
$item['content'] .= $read_html->find('.chapter-title', 0);
|
$item['content'] .= $read_html->find('.chapter-title', 0);
|
||||||
$item['content'] .= '</h3>';
|
$item['content'] .= '</a></h3>';
|
||||||
$item['content'] .= $read_html->find('#chp_raw', 0);
|
$item['content'] .= $read_html->find('#chp_raw', 0);
|
||||||
|
|
||||||
$this->items[] = $item;
|
$this->items[] = $item;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected $author = '';
|
||||||
|
|
||||||
protected function parseItem(array $item)
|
protected function parseItem(array $item)
|
||||||
{
|
{
|
||||||
//For series, filter out other series from 'All' feed
|
//For series, filter out other series from 'All' feed
|
||||||
|
Loading…
x
Reference in New Issue
Block a user