From d00745c41113894e54f3ff71d73006410e3e9e20 Mon Sep 17 00:00:00 2001 From: Mynacol Date: Thu, 28 Aug 2025 22:10:00 +0000 Subject: [PATCH] [GolemBridge] Add and remove more stuff Fix the inclusion of tables and add ul/ol elements. This includes new unwanted stuff that is explicitly removed. --- bridges/GolemBridge.php | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/bridges/GolemBridge.php b/bridges/GolemBridge.php index d8b18070..e401508a 100644 --- a/bridges/GolemBridge.php +++ b/bridges/GolemBridge.php @@ -139,8 +139,9 @@ class GolemBridge extends FeedExpander // delete known bad elements and unwanted gallery images foreach ( - $article->find('div[id*="adtile"], #job-market, #seminars, iframe, .go-article-header__title, .go-article-header__kicker, - .gbox_affiliate, div.toc, .go-button-bar, .go-alink-list, .go-teaser-block, .go-vh, .go-gallery .[data-active="false"]') as $bad + $article->find('div[id*="adtile"], #job-market, #seminars, iframe, .go-article-header__title, .go-article-header__kicker, .go-label--sponsored, + .gbox_affiliate, div.toc, .go-button-bar, .go-alink-list, .go-teaser-block, .go-vh, .go-paywall, .go-index-link, .go-pagination__list, + .go-gallery .[data-active="false"]') as $bad ) { $bad->remove(); } @@ -165,8 +166,8 @@ class GolemBridge extends FeedExpander } foreach ( - $article->find('div.go-article-header__intro, p, h1, h2, h3, pre, .go-media img[src*="."], .go-media figcaption, - div[class*="golem_tablediv"], iframe, video') as $element + $article->find('div.go-article-header__intro, p, h1, h2, h3, pre, ul, ol, .go-media img[src*="."], .go-media figcaption, + table, iframe, video') as $element ) { if (!str_contains($prevcontent, $element)) { $item .= $element;