mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2025-01-16 13:50:01 +01:00
[ArsTechnicaBridge] Fix the bridge after redesign (#4282)
This commit is contained in:
parent
cdf21d48e5
commit
f89c75b4b8
@ -37,39 +37,82 @@ class ArsTechnicaBridge extends FeedExpander
|
||||
{
|
||||
$item_html = getSimpleHTMLDOMCached($item['uri']);
|
||||
$item_html = defaultLinkTo($item_html, self::URI);
|
||||
$item['content'] = $item_html->find('.article-content', 0);
|
||||
|
||||
$content = '';
|
||||
$header = $item_html->find('article header', 0);
|
||||
$leading = $header->find('p[class*=leading]', 0);
|
||||
if ($leading != null) {
|
||||
$content .= '<p>' . $leading->innertext . '</p>';
|
||||
}
|
||||
$intro_image = $header->find('img.intro-image', 0);
|
||||
if ($intro_image != null) {
|
||||
$content .= '<figure>' . $intro_image;
|
||||
|
||||
$image_caption = $header->find('.caption .caption-content', 0);
|
||||
if ($image_caption != null) {
|
||||
$content .= '<figcaption>' . $image_caption->innertext . '</figcaption>';
|
||||
}
|
||||
$content .= '</figure>';
|
||||
}
|
||||
|
||||
foreach ($item_html->find('.post-content') as $content_tag) {
|
||||
$content .= $content_tag->innertext;
|
||||
}
|
||||
|
||||
$item['content'] = str_get_html($content);
|
||||
|
||||
$parsely = $item_html->find('[name="parsely-page"]', 0);
|
||||
$parsely_json = json_decode(html_entity_decode($parsely->content), true);
|
||||
$item['categories'] = $parsely_json['tags'];
|
||||
|
||||
$pages = $item_html->find('nav.page-numbers > .numbers > a', -2);
|
||||
if (null !== $pages) {
|
||||
for ($i = 2; $i <= $pages->innertext; $i++) {
|
||||
$page_url = $item['uri'] . '&page=' . $i;
|
||||
$page_html = getSimpleHTMLDOMCached($page_url);
|
||||
$page_html = defaultLinkTo($page_html, self::URI);
|
||||
$item['content'] .= $page_html->find('.article-content', 0);
|
||||
// Some lightboxes are nested in figures. I'd guess that's a
|
||||
// bug in the website
|
||||
foreach ($item['content']->find('figure div div.ars-lightbox') as $weird_lightbox) {
|
||||
$weird_lightbox->parent->parent->outertext = $weird_lightbox;
|
||||
}
|
||||
|
||||
// It's easier to reconstruct the whole thing than remove
|
||||
// duplicate reactive tags
|
||||
foreach ($item['content']->find('.ars-lightbox') as $lightbox) {
|
||||
$lightbox_content = '';
|
||||
foreach ($lightbox->find('.ars-lightbox-item') as $lightbox_item) {
|
||||
$img = $lightbox_item->find('img', 0);
|
||||
if ($img != null) {
|
||||
$lightbox_content .= '<figure>' . $img;
|
||||
$caption = $lightbox_item->find('div.pswp-caption-content', 0);
|
||||
if ($caption != null) {
|
||||
$credit = $lightbox_item->find('div.ars-gallery-caption-credit', 0);
|
||||
if ($credit != null) {
|
||||
$credit->innertext = 'Credit: ' . $credit->innertext;
|
||||
}
|
||||
$lightbox_content .= '<figcaption>' . $caption->innertext . '</figcaption>';
|
||||
}
|
||||
$lightbox_content .= '</figure>';
|
||||
}
|
||||
}
|
||||
$item['content'] = str_get_html($item['content']);
|
||||
$lightbox->innertext = $lightbox_content;
|
||||
}
|
||||
|
||||
// remove various ars advertising
|
||||
$item['content']->find('#social-left', 0)->remove();
|
||||
foreach ($item['content']->find('.ars-component-buy-box') as $ad) {
|
||||
foreach ($item['content']->find('.ars-interlude-container') as $ad) {
|
||||
$ad->remove();
|
||||
}
|
||||
foreach ($item['content']->find('.ad_wrapper') as $ad) {
|
||||
$ad->remove();
|
||||
foreach ($item['content']->find('.toc-container') as $toc) {
|
||||
$toc->remove();
|
||||
}
|
||||
foreach ($item['content']->find('.sidebar') as $ad) {
|
||||
$ad->remove();
|
||||
|
||||
// Mostly YouTube videos
|
||||
$iframes = $item['content']->find('iframe');
|
||||
foreach ($iframes as $iframe) {
|
||||
$iframe->outertext = '<a href="' . $iframe->src . '">' . $iframe->src . '</a>';
|
||||
}
|
||||
// This fixed padding around the former iframes and actual inline videos
|
||||
foreach ($item['content']->find('div[style*=aspect-ratio]') as $styled) {
|
||||
$styled->removeAttribute('style');
|
||||
}
|
||||
|
||||
$item['content'] = backgroundToImg($item['content']);
|
||||
|
||||
$item['uid'] = explode('=', $item['uri'])[1];
|
||||
|
||||
$item['uid'] = strval($parsely_json['post_id']);
|
||||
return $item;
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user