1
0
mirror of https://github.com/RSS-Bridge/rss-bridge.git synced 2025-08-30 03:20:30 +02:00

[GolemBridge] Fix image extraction

- Only extract the default "active" image instead of all images in the
  gallery
- Also extract the figure caption again
- Add "normal" images not in a gallery

Test website: https://www.golem.de/news/testfahrt-im-supersportwagen-amg-gt-xx-ein-boxenstopp-mit-950-kilowatt-2508-199568.html
This commit is contained in:
Mynacol
2025-08-28 19:55:00 +00:00
parent 3f33d0e312
commit 1f5f7bc4ca

View File

@@ -139,10 +139,10 @@ class GolemBridge extends FeedExpander
}
}
// delete known bad elements
// delete known bad elements and unwanted gallery images
foreach (
$article->find('div[id*="adtile"], #job-market, #seminars, iframe, .go-article-header__title, .go-article-header__kicker,
.gbox_affiliate, div.toc, .go-button-bar, .go-alink-list, .go-teaser-block, .go-vh') as $bad
.gbox_affiliate, div.toc, .go-button-bar, .go-alink-list, .go-teaser-block, .go-vh, .go-gallery .[data-active="false"]') as $bad
) {
$bad->remove();
}
@@ -171,7 +171,10 @@ class GolemBridge extends FeedExpander
}
}
foreach ($article->find('div.go-article-header__intro, p, h1, h2, h3, pre, img[src*="."], div[class*="golem_tablediv"], iframe, video') as $element) {
foreach (
$article->find('div.go-article-header__intro, p, h1, h2, h3, pre, .go-media img[src*="."], .go-media figcaption,
div[class*="golem_tablediv"], iframe, video') as $element
) {
if (!str_contains($prevcontent, $element)) {
$item .= $element;
}