1
0
mirror of https://github.com/RSS-Bridge/rss-bridge.git synced 2025-08-16 05:24:08 +02:00

Compare commits

...

256 Commits

Author SHA1 Message Date
Dag
152e96d3d0 fix: broken if_not_modified_since (#4377) 2024-12-30 00:19:18 +01:00
Michael Vincent
f0db6a22d1 [WirecutterDealsBridge] Add bridge (#4359) 2024-12-12 17:52:41 +01:00
July
8234906127 [EpicGamesFreeBridge] Add new bridge (#4366) 2024-12-12 17:50:43 +01:00
July
d2370320e9 [ScribbleHubBridge] Get best-effort information during 403s (#4365) 2024-12-12 05:43:17 +01:00
July
9126b0f982 [CubariProxyBridge] Fix favicon properly (#4364) 2024-12-12 05:41:46 +01:00
Florent V.
4685bbdffd [EdfPricesBridge] fixing bridge (#4360)
* [EdfPricesBridge] add new brige

* [EdfPricesBridge] bad refactor

* [EdfPricesBridge] support php 7.4

* [EdfPrices Bridge] fix errors

---------

Co-authored-by: Florent VIOLLEAU <florent.violleau@samsic.fr>
2024-12-08 19:48:44 +01:00
Pavel Korytov
bf4a918e60 [MistralAIBridge] Add Mistral (#4356) 2024-12-05 17:30:21 +01:00
okbaydere
17d142c038 Add StorytelBridge for Storytel list fetching (#4355)
* Add StorytelBridge for fetching Storytel lists

* Updated StorytelBridge to include URL validation and cleaned up code
2024-12-04 18:54:24 +01:00
Predä
59d77d4576 [TikTokBridge] Include author profile picture (#4354) 2024-12-04 17:34:35 +01:00
Pavel Korytov
d956471d42 [QwenBlogBridge] Add bridge (#4353) 2024-12-02 16:46:13 +01:00
Dag
6a81fc0f51 fix(file_cache): if write failure, produce log record instead of exception (#4352) 2024-11-28 03:50:56 +01:00
July
88ccc6067c [CubariProxyBridge] Fix favicon (#4347) 2024-11-26 15:54:30 +01:00
Dawid Wróbel
c7f9870ba7 [OLXBridge] fix title and shiping info retrieval (#4346) 2024-11-26 03:04:02 +01:00
tillcash
c651e11b0f [MaalaimalarBridge] fix new url (#4344) 2024-11-25 19:03:35 +01:00
thomas-333
b42a993176 [Bluesky] New bridge (#4341)
* Create BlueskyProfileBridge.php

Bridge for Bluesky

* Update BlueskyProfileBridge.php

Attempt to fix test error

* Rename BlueskyProfileBridge.php to BlueskyBridge.php and add list of select data source

* Update BlueskyBridge.php to pass lint checks
2024-11-25 19:01:37 +01:00
SebLaus
ec6f98e3c2 Added Alternate way to get Price if no buttons available (#4342) 2024-11-24 18:11:57 +01:00
Sebastian Wolf
74496e23aa [MixologyBridge] add new bridge (#4331)
* [MixologyBridge] add new bridge

* [MixologyBridge] change invalid item property tags to categories

* [MixologyBridge] rewrite into FeedExpander

* [MixologyBridge] fix code formatting
2024-11-24 18:09:59 +01:00
User123698745
83bc3fd762 [DRKBlutspendeBridge] add new bridge (#4324)
* [DRKBlutspendeBridge] add new bridge

* [DRKBlutspendeBridge] move explode_lines into DRKBlutspendeBridge class
2024-11-24 03:57:28 +01:00
Dag
628b30208a fix: dont aquire exclusive locks (#4340)
Due to bugs in logging/error-handling there sometimes are deadlocks
2024-11-23 22:28:50 +01:00
Sebastian Wolf
e3260ff529 [NordbayernBridge] fill item categories if available (#4338) 2024-11-23 19:19:20 +01:00
Matt Connell
086ef7f8a7 feat: add WKYT bridge (#4337) 2024-11-23 19:12:36 +01:00
sysadminstory
2ee615e588 [PepperBridgeAbstract, DealabsBridge, HotUKDealsBridge, MydealsBridge] Streamlining Group Management (#4336)
* [PepperBridgeAbstract, DealabsBridge, HotUKDealsBridge, MydealsBridge] Streamlining Group Management

Since groups can change URLs, be created, or removed at the discretion
of website administrators, maintaining a valid and functional list of
groups is impractical.

Users can now enter the part of the URL that defines the group in a text
field, rather than searching through a lengthy, likely outdated list.

The way the RSS feed title is retrieved had to be adjusted accordingly.

Titles are now cached for 15 days to avoid unnecessary website access
and to prevent potential bot blocking.

Existing feeds will continue to work, as their parameters remain
unchanged; only the method for inputting them has been modified.

* [PepperBridgeAbstract, DealabsBridge, HotUKDealsBridge, MydealsBridge] Streamlining Group Management

Coding policy fixes

* [PepperBridgeAbstract, DealabsBridge, HotUKDealsBridge, MydealsBridge] Streamlining Group Management

Fix wrong comment

* [PepperBridgeAbstract, DealabsBridge, HotUKDealsBridge, MydealsBridge] Streamlining Group Management

Add Example values for Group context
2024-11-23 19:11:36 +01:00
Sebastian Wolf
a6e8760726 [FragDenStaatBridge] add new bridge (#4330) 2024-11-23 18:54:21 +01:00
July
9457e075f6 [PriviblurBridge] Fix invalid favicon, use either Tumblr or blog icon (#4327) 2024-11-23 18:50:40 +01:00
July
2294dac3f1 [AO3Bridge] Add fetch limit to reduce requests (#4328) 2024-11-23 18:47:08 +01:00
sysadminstory
6c86e2c1f7 [IdealoBridge] Really fix Logic and enhance Feed Content (#4321)
- Fix Feed Title generation (wrong usage of loadCacheValue)
- Use a more reliable way to get New and Used Price
- If no new Price and no Used Price are present in the page, then don't
  delete previous New Price and previous Used Price
- If there is no New Price and no Used Price, then return no Feed
  Item
- Fix the "now" date format
- Make the Feed Item Title more readable
- Use the Product Link as the Feed URL
2024-11-08 08:11:18 +01:00
Dennis
dd165ea9d1 [HuntShowdownNewsBridge] Fetches the latest articles from Hunt Showdown (#4318)
* feat: add Hunt Showdown News Bridge for fetching latest news articles

* chore: clean up formatting and remove unnecessary whitespace in HuntShowdownNewsBridge.php
2024-11-04 15:16:58 +01:00
Rose Liverman
1cd5b072f3 Formatting fix "For Hosts" documentation (#4317) 2024-11-03 18:33:05 +01:00
Tuğhan Belbek
8d6d0fa10c [DuvarOrgBridge] Add Duvar.org bridge for scraping news articles (#4315)
* Add Duvar.org bridge for scraping news articles

* PR Fixes

* Update DuvarOrgBridge.php to set a default value for the URL suffix

---------

Co-authored-by: Tughan Belbek <Tughan.Belbek@t-hive.io>
2024-11-03 18:30:28 +01:00
sysadminstory
bd0fb1da99 [IdealoBridge] Fix (#4316)
When a product was available before as used product in the past, and
now it's not available used anymore, a price update article was
generated on every feed loading, because the old used price was still
stored in the cache, and therefore different as "no price".

The issue was also present in the cas of a New product price that
becomes unavailable.

Now, when either there is no New or Used price available, the previous
price is delete from the cache.
2024-11-03 18:28:32 +01:00
Alexander Sulfrian
29d984cbe7 [TagesspiegelBridge] Add bridge for tagesspiegel.de (#4270)
* [TagesspiegelBridge] Add bridge for tagesspiegel.de

* [TagesspiegelBridge] Raise timtout to 60min
2024-11-03 18:25:51 +01:00
Arnav Jain
082542dabc [TestFaktaBridge] new bridge (#4307)
* [TestFaktaBridge] new bridge

* [TestFaktaBridge] fix linting errors
2024-11-03 18:22:44 +01:00
Arnav Jain
bc536f3928 [DäcksnackBridge] New Bridge (#4309)
* [DäcksnackBridge] new bridge

* [DäcksnackBridge] move preamble before figure
2024-11-03 18:20:48 +01:00
Matthieu Rakotojaona
c3dc46a307 [prtester] Update python dependency (#4311)
This is necessary for glob.glob() with the root_dir argument
2024-10-20 19:16:29 +02:00
User123698745
6c88f2c21e [prtester] fix prtester no longer supporting multiple bridges being changed, because the filenames are not unique (#4310) 2024-10-20 00:18:52 +02:00
Arnav Jain
668f3a9d7e [AppleMusicBridge] fix linting error (#4308) 2024-10-19 20:19:24 +02:00
somini
b9eb3c887a [PCGWNewsBridge] Remove bridge (#4305)
Fix #4291
2024-10-18 08:31:08 +02:00
Jonas Taedcke
f9a51b6768 [AppleMusicBridge] Further data request to receive artist information. (#4271) 2024-10-18 08:29:07 +02:00
Tuğhan Belbek
51cdb66f9c [HarvardBusinessReviewBridge] Add bridge (#4293) 2024-10-17 14:17:48 +02:00
Tostiman
bd88bc27d3 [TheDrive] New bridge (#4304) 2024-10-17 14:14:51 +02:00
Alexander Sulfrian
56994b3b5c [ZeitBridge] Remove content from original feed (#4260)
The original feed contains a small version of the header image and
the summary or a literal "None". The header image is already added, but
the original content was kept. This removes the original content and
adds the summary if it exists.
2024-10-17 08:47:44 +02:00
Bocki
664436c5f4 [prtester] Optimize tester workflow (#4303) 2024-10-17 01:25:07 +02:00
Bocki
70cf917f09 [ForensicArchitecture] Create ForensicArchitectureBridge.php (#4301) 2024-10-17 00:09:35 +02:00
Bocki
776e27218a [maint] fix phpunit test (#4300) 2024-10-17 00:00:52 +02:00
Bocki
e5e2059ed7 [maint] Update all workflow action versions (#4298) 2024-10-16 19:46:56 +02:00
Bocki
e7d6f89887 [ForensicArchitecture] Remove for bugfixing (#4297) 2024-10-16 19:21:24 +02:00
somini
0c96a47e8c Remove PanacheDigitalGamesBridge (#4277)
The Blog has a feed now:

https://panachedigitalgames.com/en/feed/
2024-10-16 19:14:06 +02:00
tillcash
5d83050673 [ForensicArchitectureBridge] Add Bridge (#4280) 2024-10-16 19:13:00 +02:00
vlnst
bd823100cd [maint] Update instance location (#4279) 2024-10-16 19:04:26 +02:00
Pavel Korytov
f89c75b4b8 [ArsTechnicaBridge] Fix the bridge after redesign (#4282) 2024-10-16 18:59:36 +02:00
Eugene Molotov
cdf21d48e5 [RutubeBridge] Multiple fixes (#4284) 2024-10-16 18:58:18 +02:00
Tostiman
3a5de759fa [CarThrottleBridge] update for new layout (#4285) 2024-10-16 18:57:44 +02:00
Tostiman
eb21e97d01 [OvertakeBridge] Renamed RaceDepartmentBridge to OvertakeBridge (#4294) 2024-10-16 18:37:30 +02:00
tillcash
6aba9fdf54 [MaalaimalarBridge] fix url (#4295) 2024-10-16 18:35:06 +02:00
Bocki
63c16e470d [prtester] Rework test storage (#4292)
* Update prtester.py

* Update prhtmlgenerator.yml
2024-10-16 15:36:57 +02:00
Mynacol
af26d845d9 Include all bridges in tarballs
Currently, two "demo" and "example" bridges are excluded from GitHub's
autogenerated tarballs. As I argued, those files can still be helpful
for integration tests, as they are run in NixOS and don't need internet
access or depend on the availability of external services [1].

Additionally, the official docker image builds from the checkout so it
includes those bridges when users use containers or a git checkout
compared to tarballs. This commit therefore unifies the list of
available bridges between deployment methods.

[1] https://github.com/NixOS/nixpkgs/blob/master/nixos/tests/web-apps/rss-bridge.nix#L20
2024-10-09 18:10:52 +02:00
osvfj
80c43f10d8 [TCBScansBridge] Add bridge (#4263) 2024-09-12 11:07:22 +02:00
sysadminstory
d9316cdc60 [PicukiBridge] Try to fix the bridge (#4262)
This is a try to fix the bridge HTML parsing
2024-09-11 15:14:19 +02:00
tillcash
40041dd65f [DailythanthiBridge] fix url (#4261) 2024-09-09 19:06:08 +02:00
Pavel Korytov
358bebbb89 [EconomistWorldInBriefBridge] Fix bridge (#4258) 2024-09-07 05:02:27 +02:00
Dag
293d04f296 fix(spotify): detect rate limiting (#4253) 2024-09-03 07:02:37 +02:00
July
3dc8b65a0b [GovTrackBridge] Add feed for GovTrack events and blog (#4231)
* [GovTrackBridge] Add feed for GovTrack events and blog

* [GovTrackBridge] add missing default value

* [GovTrackBridge] leaner items array and limit implementation
2024-09-02 21:49:49 +02:00
Dag
486191b419 fix(cve_details) (#4251) 2024-09-02 21:43:40 +02:00
Dag
a6bdc322b0 refactor: extract exception and cache middleware (#4248) 2024-09-01 21:48:14 +02:00
bloominstrong
36fd72c87e [ABCNewsBridge] Fix broken due to site redesign (#4247) 2024-08-31 16:27:45 +02:00
Dag
9cabf60144 docs
* refactor

* docs
2024-08-30 04:37:40 +02:00
Dag
6a24e53d6c refactor (#4244) 2024-08-30 04:21:51 +02:00
Dag
bb2f471a03 fix: bug in prior fix (#4243)
Have to tweak the config BEFORE instantiating of course
2024-08-30 02:44:50 +02:00
Dag
3e1a8b29d9 fix: extract duplicate config loading (#4242)
Also fix a problem with bin/cache-prune and FileCache and its enable_purge option
2024-08-30 02:29:51 +02:00
Dag
9f48370eb0 fix: tweak caching logic (#4241) 2024-08-30 00:22:11 +02:00
Dag
39952c2d95 refactor: implement middleware chain (#4240)
* refactor: implement middleware chain

* refactor
2024-08-30 00:07:58 +02:00
Dag
e7ae06dcf0 fix: bug in prior refactor (#4239) 2024-08-29 23:02:01 +02:00
Dag
58544cd61a refactor: introduce DI container (#4238)
* refactor: introduce DI container

* add bin/test
2024-08-29 22:48:59 +02:00
tillcash
e010fd4d52 [HinduTamilBridge] fix image (#4237) 2024-08-28 19:45:54 +02:00
Petr Kolář
d51cc8f1a7 Fixed path in CeskaTelevizeBridge (#4236) 2024-08-28 19:43:40 +02:00
Dag
6516e31c1b refactor: format rendering (#4229) 2024-08-23 17:34:06 +02:00
Dag
c849576c93 fix(rumble): fix guid bug (#4232)
Remove tracking parameter in query to avoid feed readers to interpret these as new items
2024-08-23 17:09:17 +02:00
Clemens Neubauer
b0674d7b19 [BMDSystemhausBlogBridge] rework detectParameters (#4138)
* bridge BMDSystemhausBlog: rework of detectParameters

* fix lint phpcs error

* Update BMDSystemhausBlogBridge.php

* Update BMDSystemhausBlogBridge.php
2024-08-22 11:36:58 +02:00
Dag
05e2c350b7 refactor: less reliance on super globals (#4228) 2024-08-22 00:33:35 +02:00
July
4a3919c1a3 [NPRBridge] Add missing tag and remove extra HTML elements (#4227) 2024-08-21 23:05:29 +02:00
July
06a8896000 [PriviblurBridge] Add Priviblur (Tumblr frontend) bridge (#4221)
* [PriviblurBridge] Add Priviblur (Tumblr frontend) bridge

* [PriviblurBridge] prevent error if post has no tags
2024-08-21 22:58:26 +02:00
July
d379f3e575 [CubariProxyBridge] add bridge for cubari manga proxies (#4220)
* [CubariProxyBridge] add bridge for cubari manga proxies

* [CubariProxyBridge] add limit and use isset
2024-08-21 22:57:02 +02:00
July
3a327503ee [NPRBridge] add bridge for NPR stories (#4225)
* [NPRBridge] add bridge for NPR stories

* [NPRBridge] Use better selectors for multiple items
2024-08-21 22:10:03 +02:00
tillcash
2d5d2f5017 [NvidiaDriverBridge] fix typo (#4224) 2024-08-20 17:32:15 +02:00
tillcash
320afc3f32 [MaalaimalarBridge] fix image (#4222)
* [NvidiaDriverBridge] Added Windows support

* Update NvidiaDriverBridge.php

* Update NvidiaDriverBridge.php

* [MaalaimalarBridge] fix image

* [MaalaimalarBridge] fix lint
2024-08-19 19:17:42 +02:00
Dag
c0e37bcf35 refactor: frontpage and proxy setting (#4214) 2024-08-18 19:11:11 +02:00
Tobias Alexander Franke
e9d3a657ba [EASeedBridge] New bridge for the EA Seed blog (#4216)
* [EASeedBridge] New bridge for the EA Seed blog

* Fix linter issues
2024-08-15 00:47:39 +02:00
Tobias Alexander Franke
307c22204d [ActivisionResearchBridge] New bridge for the Activision Research blog (#4213)
* [ActivisionResearchBridge] New bridge for the Activision Research blog

* [ActivisionResearchBridge] Fix linting issues
2024-08-11 23:20:20 +02:00
Dag
4424ea54e9 chore: increase linter speed (#4211) 2024-08-11 02:31:50 +02:00
Dag
133dbf87c5 fix(telegram): add note if content is omitted from preview page (#4210)
* fix(telegram): add note if content is omitted from preview page

* lint
2024-08-11 01:23:10 +02:00
July
2e6e246759 [KemonoBridge] attempt to fix malformed tag responses (#4209) 2024-08-10 23:11:43 +02:00
Mynacol
129b8a3a5a [ModifyBridge] New bridge to modify feeds (#4164)
* [ModifyBridge] New bridge to modify feeds

Create a general bridge that can modify the common fields of feeds
with regular expressions.

* [ModifyBridge] Also modify <enclosure> element

Additionally to the list of <enclosures>.
2024-08-10 23:10:37 +02:00
July
4ef5ca50c6 [KemonoBridge] Add KemonoBridge (#4192)
* [KemonoBridge] Add KemonoBridge

* refactor

* [KemonoBridge] fix categories in cases where it's a proper json array

---------

Co-authored-by: Dag <me@dvikan.no>
2024-08-10 17:36:58 +02:00
Tone
adcc8e371d [TarnkappeBridge] changed "unwanted stuff" (#4206)
* [TarnkappeBridge] changed "unwanted stuff"

em was removed because the annoying affiliate info, but it also deleted the text from blockquotes.

The p-element with the affiliate info has no attributes like class, but it is the only p-element with a style-attribute, so I used this to identify it.

* Update TarnkappeBridge.php

removed whitespace

* Update TarnkappeBridge.php

don't know why I did it twice before
2024-08-09 15:20:10 +02:00
Dag
f358f1abec refactor: loadCacheValue/saveCacheValue (#4205) 2024-08-08 17:47:04 +02:00
Dag
2acd415475 refactor: drop usage of Debug::log (#4202)
* refactor: drop usage of Debug::log

* lint
2024-08-08 04:31:47 +02:00
Dag
6afd13eb06 refactor: deprecate FeedItem constructor (#4201)
* fix: bug in prior commit

* refactor: deprecate FeedItem constructor

* test: fix
2024-08-08 03:43:26 +02:00
Dag
2a96bf19b5 fix: bug in prior commit (#4200) 2024-08-08 02:55:35 +02:00
Dag
9973f731df feat: introduce RateLimitException (#4199) 2024-08-08 02:13:04 +02:00
tillcash
7073bb2f46 [NVIDIADriverBridge] Initial Commit (#4198)
* [NVIDIADriverBridge] Initial Commit

Fetch the latest NVIDIA Linux driver updates

* Update NVIDIADriverBridge.php

* refactor

* rename

---------

Co-authored-by: Dag <me@dvikan.no>
2024-08-08 01:35:48 +02:00
Quentin B.
db85015daa [AnfrBridge] Add bridge (#4191)
* [AnfrBridge] Add bridge

* yup

---------

Co-authored-by: Dag <me@dvikan.no>
2024-08-08 01:20:42 +02:00
Quentin B.
8c4385e61d [BodaccBridge] Add bridge (#4190)
* [BodaccBridge] Add bridge

* [BodaccBridge] Fix bridge

* [BodaccBridge] Fix API url

* fix

---------

Co-authored-by: Dag <me@dvikan.no>
2024-08-08 01:09:13 +02:00
Quentin B.
829d570f8e [CentreFranceBridge] Add bridge (#4189)
* [CentreFranceBridge] Add bridge

* [CentreFranceBridge] Fix bridge

* [CentreFranceBridge] Fix bridge

* [CentreFranceBridge] Improved icon choice

* [CentreFranceBridge] Fetch additional data from articles

* [CentreFranceBridge] New parameter to allow client to control how many articles to fetch

* [CentreFranceBridge] Improve bridge name based on existing parameters

* [CentreFranceBridge] Fixed some edge cases

* refactor: reorder

* fix

---------

Co-authored-by: Dag <me@dvikan.no>
2024-08-08 00:57:40 +02:00
Pavel Korytov
b25a779d98 [TldrTechBridge] Fix bridge (#4187)
* [TldrTechBridge] Fix bridge

* yup

---------

Co-authored-by: Dag <me@dvikan.no>
2024-08-08 00:27:33 +02:00
Christian Schabesberger
ee54cf4576 add NurembergerNachrichten bridge (#4185)
* add NurembergerNachrichten bridge

apply suggested changes and fix regions

put collectData on top

replace self:: with -> for methodcalls

* refactor: remove unused var

* refactor: order methods

* fix

---------

Co-authored-by: Dag <me@dvikan.no>
2024-08-08 00:00:26 +02:00
Dag
9215b95779 fix: bug in prior refactor (#4197) 2024-08-07 18:56:27 +02:00
Dag
c11bc184ca fix: restore php error_log writing (#4196) 2024-08-07 18:09:44 +02:00
Christian Schabesberger
313be4c512 replace self:: with -> for methodcalls in Nordbayern bridge (#4195) 2024-08-07 15:51:44 +02:00
Dag
4faaa79101 refactor: change the way dependencies are wired (#4194)
* refactor: change the way dependencies are setup

* lint
2024-08-07 03:15:43 +02:00
Dag
6ec9193546 yuop (#4193) 2024-08-07 00:21:06 +02:00
Eugene Molotov
401cc187b7 [RutubeBridge] Fix playlist mode returning empty result (#4184) 2024-08-02 17:44:46 +02:00
Dag
0051e0fcdd docs: improve docker docs (#4183)
* docs: improve docker docs

* fix: cleanup and remove duplicate docker instructions
2024-08-01 23:36:14 +02:00
Tone
d050fe9a9b [AnisearchBridge] fixed typo (#4182)
don't know why it was there
2024-08-01 12:36:26 +02:00
Dag
8ae716e75c fix: improve github issue template (#4181) 2024-07-31 21:57:33 +02:00
Pavel Korytov
b505667168 [SubstackBridge] Add Substack bridge (#4174)
* [SubstackBridge] Add Substack

* [SubstackBridge] Add docs

* [SubstackBridge] Fix lint

* [SubstackBridge] Update description

* [SubstackBridge] Update description (x2)
2024-07-31 21:57:20 +02:00
Dag
615c533587 fix(FeedParser): dont emit content module (#4180) 2024-07-31 20:34:33 +02:00
Dag
8a1f2604aa fix: bug in prior refactor (#4179)
* fix: bug in prior refactor

* fix deprecation notice
2024-07-31 19:25:51 +02:00
Dag
b8a9f34527 fix(FeedParser): scrape out content from rss content:encoded (#4178)
* fix(FeedParser): parse content module from rss2

* refactor
2024-07-31 19:04:07 +02:00
Dag
e55e9b8fac feat: enable all bridges by default (#4177) 2024-07-31 17:53:10 +02:00
Dag
9982bfce1f fix: convert php errors to exceptions when in debug mode (#4176) 2024-07-31 17:51:44 +02:00
Zack Puhl
1a8d0fb8ab [EBayBridge] fix undefined vars errors (#4175) 2024-07-31 17:51:05 +02:00
Dag
891c8979a3 refactor: return proper response object (#4169) 2024-07-31 17:30:06 +02:00
Pavel Korytov
aa3989873c [EconomistBridge] Add cookie (#4173)
* [EconomistBridge] Add cookie

* [EconomistBridge] Fix lint
2024-07-30 22:10:57 +02:00
MarKoeh
cb91afbd71 [ARDMediathekBridge] fixing API URL, start using show title (#4170) (#4172)
The bridge stopped working after the API server stopped accepting a trailing slash after the ID in the URL. This is being fixed. Also, the show title in the JSON was ignored. This is being fixed as well
2024-07-30 22:08:18 +02:00
Zack Puhl
22b39e3fcd [EBayBridge] Repair & Augment the eBay Feed (#4157)
* [EBayBridge]: discount details; fix DOM parsing

* [EBayBridge] Ending slash. No "www.ebay.commyhijack.net", for example.

* [EBayBridge] Trim discountLine details when set.

* [EBayBridge] Refactor and update content

* shameless self-addition to CONTRIBUTORS.md

* [EBayBridge] Toggle original search links w/ checkbox

* [EBayBridge] oops: fix introduced XSS vuln

* [EBayBridge] Fix linting error: use array_column

* [EBayBridge] fix compat with <php8
2024-07-29 17:53:39 +02:00
Zack Puhl
6d81d6d306 [RumbleBridge] Facelift, Validation, & Livestreams (#4160)
* [RumbleBridge] Facelift+media types (livestreams)

* [RumbleBridge] Remove 'required' from list input.

* [RumbleBridge] lint
2024-07-29 17:53:14 +02:00
Dag
955fb6f315 fix(reddit): increase default cache ttl (#4168) 2024-07-29 00:18:28 +02:00
Christian Schabesberger
8dd56bca05 fix bulletpoints for nordbayern (#4166) 2024-07-28 22:42:18 +02:00
Pavel Korytov
f773878459 [EconomistWorldInBriefBridge] Add cookie to options (#4165)
* [EconomistWorldInBriefBridge] Add cookie

* [EconomistWorldInBriefBridge] Add docs

* [EconomistWorldInBriefBridge] Best-effort to work without cookie
2024-07-28 22:41:08 +02:00
Eugene Molotov
d28a0fd94b [Vk2Bridge] Handling albums (#4163) 2024-07-28 22:34:12 +02:00
Eugene Molotov
bba225dfe8 [RutubeBridge] New option to fetch video from search results (#4162) 2024-07-28 22:33:48 +02:00
Tone
a1b3e596fc [AnisearchBridge.php] fixed youtube link (#4159)
$trailer->{'data-xsrc'} wasn't read correctly in EOT context
2024-07-28 22:21:14 +02:00
enwuenwu
2fcba49433 [Mailman2Bridge] fix message separation and improve "From_ lines" disambiguation (#4156)
* [Mailman2Bridge.php] enable PCRE_MULTILINE pattern modifier

Enable PCRE_MULTILINE pattern modifier on mbox content parsing. Without it parsing monthly archives results in only a single message each.

* [Mailman2Bridge.php] extend mbox "From_ lines" pattern

Extend PCRE pattern matching individual "From_ lines" used to split single messages in mbox content. 

In addition to the matching line having to start with 'From ' it now also has to end with time and date (hh:mm:ss yyyy). 

This makes the pattern slightly more robust against accidental matches when a line within the actual message body starts with 'From ' which Mailman 2 (Pipermail) may not be configured to disambiguate.

* [Mailman2Bridge.php] remove trailing slash from URI constant

---------

Co-authored-by: enwu <108224417+8279279374@users.noreply.github.com>
2024-07-28 22:11:48 +02:00
Tostiman
049af3cef7 [HardwareInfoBridge] delete bridge for discontinued website (#4124) 2024-07-28 22:03:35 +02:00
Dmitry R.
376e711f03 [NovayaGazetaEuropeBridge]: fix warnings (#4154) 2024-07-28 22:02:47 +02:00
tillcash
00d5242871 [GithubTrendingBridge] Add support for spoken languages (#4149)
* [GithubTrendingBridge] Add support for spoken languages

* Update GithubTrendingBridge.php
2024-07-28 22:00:36 +02:00
ORelio
f7ddbcd733 [GBAtemp] Fix title extraction (#4151)
Fix title extraction for news and reviews
2024-07-28 21:58:08 +02:00
tillcash
da8cfdf179 [HinduTamilBridge] refactor (#4146)
* [HinduTamilBridge] refactor

* [HinduTamilBridge] fixed lint

* [HinduTamilBridge] fixed lint 2

* Update HinduTamilBridge.php
2024-07-05 22:39:47 +02:00
Tone
4539eb69aa [GolemBridge] fix youtube links (#4144) 2024-07-04 20:53:49 +02:00
Niehztog
8bf1537054 delete obsolete bridge (#4143) 2024-07-04 20:53:16 +02:00
tillcash
d0c35146dd [HinduTamilBridge] Fix timestamp again (#4142) 2024-06-28 20:51:59 +02:00
Thomas
adad9d6405 [YouTubeCommunityTabBridge] Improve JSON extraction (#4140)
Small change that should make the extraction of JSON from HTML work more
reliably
2024-06-24 22:32:03 +02:00
July
2a84350cb2 [HumbleBundleBridge] Create new bridge (#4139)
* [HumbleBundleBridge] Create new bridge

* [HumbleBundleBridge] Use less redundant bundle type handling
2024-06-21 15:47:34 +02:00
Dag
d60f0b0e74 feat(FilterBridge): custom feed name parameter (#4136)
fix #4100
2024-06-18 21:12:29 +02:00
Dag
00074b9bfc fix: dont remove www from anchors in DOM, fix #4114 (#4135) 2024-06-18 20:55:05 +02:00
Dag
206bebc7bd ci: disallow the sizeof function in linter (#4134) 2024-06-18 20:22:46 +02:00
Mynacol
0eac7a0784 [HeiseBridge] Remove lost+found icon
Remove the icon visible in l+f articles, e.g.
https://www.heise.de/news/l-f-DISGOMOJI-die-Linux-Malware-die-auf-Emojis-steht-9765024.html

Using a css selector in the form img[alt*="l+f"] was tried, but is not
supported by the used library.
2024-06-16 13:23:36 +02:00
Ftonans
649dfa7292 Update instance list (#4131)
vern's instance seems to be working, I changed the url to https since they have automatic redirect.

I removed trailing slashes from the urls so they look the same.

I removed [rss.m3wz.su](https://rss.m3wz.su] since I didn't see the website online and the owner last posted on Fediverse two months ago. I'm not sure maybe it should be in "Inactive" category, I can try to contact m3wz for information about his instance.

I removed rss.foxhaven.cyou because of [this](https://shitpost.poridge.club/notes/9lumb2gll8) (TL;DR the owner lost access to the domain)

bus-hit is offline but the main website is working. I guess the rss-bridge just crashed and the owner will restart it.
2024-06-13 20:11:02 +02:00
sysadminstory
bb1e308057 [IdealoBridge] Fix price comparison and some PHP Notice (#4130)
* [IdealoBridge] Fix price comparison and some PHP Notice

- The prices were compared as String and the comparison was wrong in
  some case : now the price are converted to float before the
 comparison, so the logic works really.

- Don't show a new or used product price if it does not exist : this
  prevents a PHP Notice to be thrown

* [IdealoBridge] Fix price conversion in case the price is null

The conversion as float of the text price won't work if the price is
null : we retunr null in this case now.
2024-06-13 05:03:20 +02:00
July
e1b74aeb1b [GameBananaBridge] Add categories and more detailed updates (#4129)
* [GameBananaBridge] Add mod categorie(s)

* [GameBananaBridge] Include full update changelog details
2024-06-13 05:02:17 +02:00
tillcash
d3d33c72bd [HinduTamilBridge] fix timestamp (#4127) 2024-06-11 15:40:49 +02:00
Tone
87fa6ea71e [HeiseBridge.php] Prevent Youtube videos from being filtered out (#4125) 2024-06-10 19:40:07 +02:00
Tim-Florian Feulner
36706a3dec Fix NACSouthGermanyMediaLibraryBridge due to website changes (#4121) 2024-06-03 00:55:39 +02:00
tillcash
cfd406861e [HarvardHealthBlogBridge] Update (#4117)
Make article image optional as all images are representative
2024-05-30 16:08:08 +02:00
tillcash
bd90109c70 [HarvardHealthBlogBridge] New (#4116) 2024-05-29 21:16:10 +02:00
tillcash
5a68ee0c87 [HinduTamilBridge] New (#4115) 2024-05-26 17:21:14 +02:00
Albert Kiskorov
dc199ebf5c Fix: Ensure $time is set from innertext when datetime attribute is not found (#4111)
This commit addresses a bug where the $time variable is not set from the innertext of the $time_element when the datetime attribute is not found. The previous implementation only checked if $time was null or an empty string, which did not cover all cases where the datetime attribute might be missing. By using the empty() function, we ensure that $time is correctly set from the innertext when the datetime attribute is not present.
2024-05-19 14:37:59 +02:00
Mynacol
75f35391fa [HeiseBridge] Add missing <ol> elements (#4110)
The following article has <ol> elements that were missing.
Adding them to have the full content.

https://heise.de/-9714438
2024-05-18 16:51:00 +02:00
Mynacol
7bde7a56f9 [ZeitBridge] Fix linting 2024-05-18 16:35:24 +02:00
Mynacol
4d12aa2a9e [ZeitBridge] Remove annoyances, add content
Remove navigational elements, podcast images.
Add many more header images, article content in <ul> (and for ggod
measure in <ol>) and quotes with their content and not only their
author.

Extreme example:
https://www.zeit.de/campus/2024-05/protest-palaestina-universitaet-europa-uebersicht
2024-05-18 16:35:24 +02:00
Mynacol
a7ed3d56f9 [ZeitBridge] Prettify author field
By removing HTML tags (plaintext) and trimming it.
2024-05-18 16:35:24 +02:00
July
b785a4b64e ArsTechnicaBridge: restore categories lost by FeedExpander (#4030) 2024-05-17 21:29:17 +02:00
July
6e2aeda61d [GameBananaBridge] Include update contents in feed (#4103)
* [GameBananaBridge] Include update contents in feed

* [GameBananaBridge] Fix dynamic title property
2024-05-12 21:46:07 +02:00
July
4949900863 [ScribbleHubBridge] Handle 429 errors and use consistent GUID (#4104) 2024-05-12 21:45:14 +02:00
Alex Balgavy
776ee233bd [NOSBridge] fix bridge (#4102)
CSS selectors were no longer valid.
2024-05-12 20:30:23 +02:00
Facundo Tuesca
1c3024fca7 [MangaReaderBridge] Change feed title to manga name (#4092) 2024-05-08 00:25:45 +02:00
Patrick
d11b7f7754 Change URI for St. Johannes Blick (#4099)
Co-authored-by: Patrick <jummo@mailbox.org>
2024-05-05 23:30:38 +02:00
Eugene Molotov
f480209825 [YoutubeBridge] Fix empty result in search feed (#4098) 2024-05-05 23:30:23 +02:00
Thomas
d15960f955 [YouTubeCommunityTabBridge] Multi-image attachment support (#4091)
Adds support for multi-image attachments.
Also changes individual if-statments in "getAttachments" to if/elseif
as each post can apparently only have one attachment anyway.
2024-05-02 19:45:04 +02:00
Korytov Pavel
f3ca567159 [TldrTechBridge] Fix and improve bridge (#4090) 2024-04-27 10:35:59 +02:00
Thomas
d31f20758c [YouTubeCommunityTabBridge] Improve building of content & title (#4089)
* [YouTubeCommunityTabBridge] Improve building of content & title

Fixes truncated link hrefs in content and adds some general
improvements regarding the building of item content and item title

* [YouTubeCommunityTabBridge] Fix PHP deprecation warnings

Fixes the following deprecation warnings:

substr(): Passing null to parameter #1 ($string) of type string is
deprecated
2024-04-26 18:47:06 +02:00
Tone
154b8b9cdb Create TarnkappeBridge.php (#4085)
* Create TarnkappeBridge.php

* Update TarnkappeBridge.php
2024-04-19 19:08:58 +02:00
Mynacol
1f71d76ac1 [HeiseBridge] Remove additional ad banners
For example
https://www.heise.de/meinung/Kommentar-Microsofts-Sicherheitspraxis-wird-zur-Gefahr-und-das-BSI-schweigt-9686629.html
has two inline banners for a heise offering, not directly related to the
article. Removing all "inline" figures, which seems to catch all inline
unwanted elements, while avoiding removing useful figures/images.
2024-04-18 13:39:37 +02:00
sysadminstory
8c3e973b9f [PepperBridgeAbstract] Fix the "no result" detection (#4082)
The "no result" test did not work, it is fixed now.
2024-04-18 01:43:53 +02:00
llamasblade
97f5dafbc5 [HytaleBridge] Fix bridge not pulling all blog posts (#4079) 2024-04-16 17:58:05 +02:00
llamasblade
957a820931 [YandexZenBridge] Fix broken bridge for some channels (#4078)
Fixes #4071.

Major changes:
- the bridge's URI changed from zen.yandex.com to dzen.ru, as the former
  redirects to the latter (perhaps the bridge's name should be changed
  as well);
- the channel's URL is now required instead of the channel's username;
- two kinds of URLs are supported, one for channels with usernames and
  one for channels with IDs in their URL;
- the channel's real name, as shown in the webpage, is now used as the
  feed title.
2024-04-14 19:14:52 +02:00
Miika Launiainen
b4d397ff70 [YorushikaBridge] Fix getting date (#4077)
* Remove unnecessary variable

* Fix getting date
2024-04-14 19:13:31 +02:00
Arya K
89013faf7d Add Project Segfault Instance (#4076) 2024-04-13 15:59:25 +02:00
Korytov Pavel
428c6c3c66 [ScientificAmericanBridge] Update bridge (#4074)
* [ScientificAmericanBridge] Update bridge

* [ScientificAmericanBridge] Fix lint
2024-04-12 01:57:55 +02:00
Miika Launiainen
58c254ad3b [YorushikaBridge] Add language selection parameter (#4073)
* Add language selection parameter

* Fix typo

* Fix lint errors
2024-04-11 17:18:37 +02:00
Dag
a73b66f4d6 fix(ScientificAmericanBridge) (#4070) 2024-04-10 18:32:48 +02:00
sysadminstory
815dc180cc [PicukiBridge] Fix image URL (#4068)
Image URL does not need to be faked anymore, as the content/type is now valid.
2024-04-10 17:30:56 +02:00
July
7d6881732d [ScribbleHubBridge] Add list page feed creation (#4012)
* [ScribbleHubBridge] Add list page feed creation

* [ScribbleHubBridge] Add list title handling

* [ScribbleHubBridge] Don't include timestamp in List GUIDs

* [ScribbleHubBridge] Fix usage of dynamic property
2024-04-07 23:02:36 +02:00
Dag
4602f4f475 tweaks (#4065) 2024-04-06 18:07:45 +02:00
Mynacol
b3ac1d176c [FDroidRepoBridge] Simplify json retrieval (#4063)
* [FDroidRepoBridge] Simplify json retrieval

I looked into avoiding the writing-to-file and then reading-from-file altogether. Using a special file path that leaves the data in memory probably wouldn't work. But I'm unsure why we use the `index-v1.jar` file altogether.
The main F-Droid repo [lists](https://f-droid.org/en/docs/All_our_APIs/#the-repo-index) not only `index-v1.jar` (which only makes sense if we were to use the contained signature, which we don't), but also `index-v1.json` and `index-v2.json`. These json files can be fetched with `getContents`, optionally cached, and directly fed into `Json::decode` without using a temporary file. The HTTP transfer encoding can compress the file to a similar degree the jar (=zip) can. That's exactly what this commit uses.

Now the question is whether all the F-Droid repositories out there have this file. I went through the whole [list of known repositories](https://forum.f-droid.org/t/known-repositories/721) and only one repo misses the `index-v1.json` file: [Bromite](https://fdroid.bromite.org/fdroid/repo/index-v1.json). Under these circumstances we can depend on the availability of the `index-v1.json` file.

Closes #4062

* [FDroidRepoBridge] Cleanup not requiring Zip

With the last commit 1152386678, the zip
extension is not required anymore. Don't fail if it's not available.
2024-04-05 17:39:38 +02:00
Mynacol
d5aa3aef69 [FDroidRepoBridge] Fix example repo
The ttrss example/placeholder repo is offline, which fails CI jobs.
Replace it with a healthy repo and package to get working CI tests and comparisons.
2024-04-05 11:39:43 +02:00
sysadminstory
3ff2ef94e0 Fix docs : Replace relative links to files with full URL (#4059) 2024-04-04 19:28:56 +02:00
Dag
001dd47439 fix: small tweaks (#4057) 2024-04-04 19:12:04 +02:00
Dag
3cba984d22 fix(FDroidRepoBridge): unlink when json file is absent from archive (#4056) 2024-04-04 17:43:07 +02:00
sysadminstory
82606a479a [PepperBridgeAbstract,DealabsBridge,HotUKDealsBridge,MydealsBridge] Fix search URL, No results handling fixed, Thread title and Message URL handling (#4053)
* [PepperBridgeAbstract,DealabsBridge,HotUKDealsBridge,MydealsBridge] Fix search URL, No results handling fixed, Thread title and Message URL handling

Search URL has been updated according to the website.

If a search doesn't return any results, the HTML won't contain any
specific text now : the HTML structure is slightly different, so the
bridge has been updated.

The unnneded 'no-results' text is now removed from the specific bridges.

The board thread title has been removed from the content, so now we use
the page <title> element.

In case a board message is empty, there was an exception during the
filtering of message without URL.

* [PepperBridgeAbstract,DealabsBridge,HotUKDealsBridge,MydealsBridge] Fix search URL, No results handling fixed, Thread title and Message URL handling

Coding policy fixes
2024-04-04 04:08:29 +02:00
User123698745
94292af51b [prtester.py] fix url parameter encoding (#4052)
this will (at least) fix the pr preview of:
bridges/AnisearchBridge.php
bridges/BakaUpdatesMangaReleasesBridge.php
bridges/DesoutterBridge.php
bridges/IndiegogoBridge.php
2024-04-04 04:07:16 +02:00
Tone
f736da6fae [GolemBridge] fix for internal videos (#4051)
* [GolemBridge] fix for internal videos

with this internal golem-videos can be played directly from feed

* Update GolemBridge.php
2024-04-03 16:23:52 +02:00
Niehztog
fb66775ece [XPathAbstract] Refactor xpath abstract (#4047)
* refactor XPathAbstract, keep all functionality intact

* fix linter errors

* further simplify code

* set default value for raw item content to true, avoiding escaping of html tags in feed item contents by default
2024-04-02 23:14:25 +02:00
Dawid Wróbel
8f962383c2 [eBayBridge] fix Belgian eBay URL handling (#4050)
Fixes #3918
2024-04-02 01:01:23 +02:00
Dawid Wróbel
bb979e9e08 [AllegroBridge] fix logical condition on parameters (#4049) 2024-04-02 00:06:15 +02:00
Dawid Wróbel
a12bab9eed [AllegroBridge] ask for a complete cookie string, mere wcdx works no more (#4048) 2024-04-01 23:44:45 +02:00
Miika Launiainen
b4659786cb [GenshinImpactBridge] Small fixes (#4046)
* Switch json_decode to Json::decode

* Change regex delimeter from / to #

* Save item enclosures as list
2024-04-01 21:16:32 +02:00
July
7001fbaf49 [AO3Bridge] Fix bad heading selector (#4045) 2024-03-31 22:41:58 +02:00
Dag
d5d470cbc2 fix(dribble) (#4044) 2024-03-31 22:10:59 +02:00
Dag
182567e434 fix(bridges/DavesTrailerPageBridge): remove (#4043) 2024-03-31 21:52:53 +02:00
Dag
9682f74fc5 fix(cnet): author typo (#4042) 2024-03-31 21:37:51 +02:00
Dag
17a3b4c9d8 Fix 198 (#4041)
* fix(twitch): log instead of exception

* typo
2024-03-31 21:32:27 +02:00
Dag
73289324bd feat: add vendor http header to cached responses (#4040) 2024-03-31 21:02:55 +02:00
Dag
8ca1b90840 fix(NationalGeographicBridge) (#4039) 2024-03-31 20:07:14 +02:00
Niehztog
1c3c85d8ff [XPathBridge] Allow multiple categories (#4038)
* [XPathAbstract] allow multiple categories

* fix feed icons in two bridges

* fix warning

* fix linter errors
2024-03-31 18:46:07 +02:00
Miika Launiainen
d23fd2522c [GenshinImpactBridge] Fix bridge to use new API (#4011)
* [GenshinImpactBridge] Fix bridge to use new API

* Add category parameters back to not break existing feeds

* Fix lint error

* Remove whitespace
2024-03-31 03:46:23 +02:00
sysadminstory
b58d8b099b docs: Complete helper function documentation (#3911)
* docs: Complete helper function documentation

Complete documentation of the Helper functions

* docs: remove parameters and add a link to source

- Parameters removed
- Link to the file defining the function

* docs: fix links

Fix links to source files
2024-03-31 03:44:10 +02:00
Dag
545dc969d3 refactor (#4037) 2024-03-31 03:38:42 +02:00
Quentin de Longraye
24e429969f specify system section for enabling bridges (#4036) 2024-03-30 16:11:57 +01:00
Tone
e0be366258 Update AnisearchBridge.php (#4025)
* Update AnisearchBridge.php

added youtube trailer

* made trailers optional and reduced scraping to 5 articles if selected

* Update AnisearchBridge.php
2024-03-29 15:37:43 +01:00
sysadminstory
be445759b6 [PepperBridgeAbstract,DealabsBridge,HotUKDealsBridge,MydealsBridge] Move as much as possible to JSON (#4032)
As the website use more and more JSON, and JSON is a machine readable
format, I migrated as much as possible to the JSON.

This simplifies the Abstract class a lot, and the Bridge classes need
less language specifi strings.
2024-03-28 19:44:27 +01:00
July
db984d8a8b AO3Bridge: move tags to categories and remove duplicate fic summary (#4031)
* AO3Bridge: move tags to categories and remove duplicate fic summary

* [AO3Bridge] Fix tag html entity encoding
2024-03-28 19:43:17 +01:00
Tone
e251e358ff [HeiseBridge] fix for embedded youtube-videos (#4034)
* [HeiseBridge] fix for embbedded youtube-videos

with this the embedded youtube videos will work in the feed

* Update HeiseBridge.php

* Update HeiseBridge.php
2024-03-28 19:42:41 +01:00
Tone
0c2099a852 [GolemBridge] fixed embedded youtube videos (#4033)
* [GolemBridge] fixed embedded youtube videos

embedded youtube-videos can be played directly from feed now

* Update GolemBridge.php

* Update GolemBridge.php

* Update GolemBridge.php

* Update GolemBridge.php
2024-03-28 19:41:56 +01:00
Tone
fee5e269d0 Update CaschyBridge.php (#4027)
without removing the video-container-div the embedded youtube videos work again
2024-03-24 16:38:51 +01:00
Tone
2aace6c898 Added Bridge for Anisearch.de (#4023)
* Create AnisearchBridge.php

* Update AnisearchBridge.php

* Update AnisearchBridge.php
2024-03-22 21:01:16 +01:00
sysadminstory
3ed193eee2 [IdealoBridge] Update Bridge Meta data & (#4022)
The bridge meta data has been updated to reflect that the bridge works
for other international version of Idealo.

The Price trend is displayed on every price in the the Feed element
content. The same function is now used to show the price trend in the
Feed element title, to remove some duplicate code..
2024-03-22 09:44:42 +01:00
Patrick
58e2b56d40 Adjustment to new website layout (#4020) 2024-03-17 19:03:09 +01:00
Tone
a61524bf77 Update RedditBridge.php (#4019)
prevent error htmlspecialchars_decode(): Passing null to parameter #1
2024-03-17 19:02:51 +01:00
Tim-Florian Feulner
36147a082d Fix NACSouthGermanyMediaLibraryBridge for new website layout (#4014) 2024-03-15 19:20:04 +01:00
sysadminstory
e6cb5fdc89 [IdealoBridge] Fix Feed items & Feed title customisation (#4013)
- Feed items with new price tracking had "Max Price Used" instead of
  "Max Price New"
- Feed Title is now customised with the product name and the Price
  limits
- Fixed logic for saving prices in cache
- remove undefined variable notices
2024-03-13 23:47:46 +01:00
Dag
4bad1c140a fix(reddit): url encoding (#4010) 2024-03-12 23:59:10 +01:00
Dag
5b80af978f docs: improve README (#4009) 2024-03-12 19:46:21 +01:00
tillcash
ecf61f6fa7 [DailythanthiBridge] New Bridge (#4006) 2024-03-11 20:14:10 +01:00
Mynacol
254efc2812 [ZeitBridge] Remove doubled text
The first two paragraphs were repeated at the end of articles. The first
CSS selector filters those out (example 1).
The second CSS selector removes a "Zum Anschauen benötigen wir Ihre Zustimmung"
line from a poll widget. We can't load the widget successfully,
therefore we should remove all embeds that seem to use javascript
(example 2).

1: https://www.zeit.de/campus/2024-03/bundesregierung-wissenschaft-arbeitsvertrag-regeln
2: https://www.zeit.de/campus/2024-03/ausbildung-abgebrochen-gruende-azubi-aufruf
2024-03-10 22:27:32 +01:00
Jonathan Kay
84b93e0f8f [ComicsKingdomBridge] Fix/Rewrite of ComicsKingdom Bridge (#4003)
* Rewrite ComicsKingdom Bridge

Rewrite of bridge as the existing one no longer works:
- Now uses REST API
- Added optional limit to get desired number of comics
- Author now reflects the comic creators name
- Feed name and comic titles now pulled from site
- Added myself as the maintainer as I've been the one maintaining, and the existing code no longer is used

* Change API to URI to pass test

* Remove whitespace, add curly braces and switch to single quotes
2024-03-10 15:18:50 +01:00
tillcash
79699131e8 [MaalaimalarBridge] New Bridge (#4001) 2024-03-08 12:46:32 +01:00
July
f7c1b71939 NyaaTorrentsBridge: add torrent to enclosures and generate better feed name (#3996)
* NyaaTorrentsBridge: add torrent to enclosures and generate better feed name

* NyaaTorrentsBridge: fix accidental () in bridge name
2024-03-06 19:40:59 +01:00
July
7a7f8d5050 AnnasArchiveBridge: correctly handling partial matches and file links (#3997) 2024-03-06 01:28:24 +01:00
D5k H3h
683c968d64 [Rooster Teeth] Add Camp Camp channel (#3992) 2024-03-01 20:24:14 +01:00
Dag
4c355ba308 fix(FilterBridge): trim title so that regex filter works as expected (#3989)
The fix is in FeedParser, so this fixes all usages
of FeedParser where title is now trimmed.

fix #3985
2024-02-20 19:32:31 +01:00
xduugu
35f6e62e45 docker: Use pre-built curl-impersonate library from github releases (#3984)
The docker image is only available for `amd64` architecture and therefore
cannot be used for arm images.

Fixes #3983
2024-02-20 08:03:04 +01:00
hleskien
932f20d434 fixed date with time in LuftfahrtBundesAmtBridge (#3987) 2024-02-18 19:19:33 +01:00
Korytov Pavel
e65155f440 [OpenCVEBridge] Add bridge (#3978)
* [OpenCVEBridge] Add bridge

* [OpenCVEBridge] Fix tests

* [OpenCVEBridge] Fix description of the filter parameter
2024-02-16 22:24:13 +01:00
July
7813f4564e AO3Bridge: add options to fetch chapter contents and list titles (#3981)
* AO3Bridge: add options to fetch chapter contents and titles for list feeds

and add downloads for each fic to enclosures

* AO3Bridge: fix list default value

* AO3Bridge: fix erroneous dynamic property usage

* AO3Bridge: fix unit test failure for getURI
2024-02-16 04:14:17 +01:00
sysadminstory
4d15ffd2cf [PepperBridgeAbstract,DealabsBridge,HotUKDealsBridge,MydealsBridge] (#3982)
Exclude thread results

Some categories showed some thread in the middle of the deals : now only
the deals are handled

Updated the "no results" text to follow the sites changes
2024-02-16 03:58:15 +01:00
Dag
598ee5b51e fix(pinterest): set enclosure so it emits mrss media:content prop (#3980) 2024-02-14 16:02:54 +01:00
Eugene Molotov
257799be8e [Vk2Bridge] Alternative bridge for VK (#3878) 2024-02-10 15:59:39 +01:00
hleskien
8e8028b786 Adopt WebDriverAbstract as a solution for active (JavaScript) websites (#3971)
* first working version

---------

Co-authored-by: Dag <me@dvikan.no>
2024-02-10 04:42:22 +01:00
Dag
ff7840d60f chore: prepare for introduction of php-webdriver/webdriver (Selenium) (#3975) 2024-02-09 22:51:10 +01:00
Dag
df7b91a2a3 chore: upgrade composer root deps (#3974)
composer update --root-reqs
Loading composer repositories with package information
Updating dependencies
Lock file operations: 0 installs, 2 updates, 0 removals
  - Upgrading phpunit/phpunit (9.6.9 => 9.6.11)
  - Upgrading squizlabs/php_codesniffer (3.7.2 => 3.8.1)
Writing lock file
Installing dependencies from lock file (including require-dev)
Package operations: 0 installs, 2 updates, 0 removals
  - Upgrading phpunit/phpunit (9.6.9 => 9.6.11): Extracting archive
  - Upgrading squizlabs/php_codesniffer (3.7.2 => 3.8.1): Extracting archive
Generating autoload files
26 packages you are using are looking for funding.
Use the `composer fund` command to find out more!
No security vulnerability advisories found.
2024-02-09 22:39:45 +01:00
Dag
7b2ac36264 chore: move committed third-party deps to lib (#3973) 2024-02-09 22:27:35 +01:00
tillcash
46ac77590e [KilledbyMicrosoftBridge] Update: Adjusted content format for consistency (#3968) 2024-02-09 09:39:03 +01:00
Dag
6f731b20a9 fix(DarkReading): official rss endpoint changed (#3967) 2024-02-09 08:03:04 +01:00
Dag
8a6798a227 fix: escape token for html context (#3966) 2024-02-09 07:27:16 +01:00
Tone
ae2eb2f1d1 feat(Reddit): add parameter for web UI frontend 2024-02-08 20:05:24 +01:00
Korytov Pavel
cfef482366 [EconomistBridge] Handle 404s in feed gracefully (#3965) 2024-02-08 15:36:03 +01:00
Tone
75a0a779c0 Update HeiseBridge.php (#3963)
fix for broken article categories
2024-02-08 15:35:24 +01:00
tillcash
6bb04d48ed [KilledbyMicrosoftBridge] New Bridge (#3961) 2024-02-07 19:33:25 +01:00
Dag
6878eb26aa fix: changed dom (#3958) 2024-02-06 19:32:05 +01:00
sysadminstory
64f95b4990 [PepperBridgeAbstract,DealabsBridge,HotUKDealsBridge,MydealsBridge] Fix missing price, discount and ships from information (#3956)
- DealabsBridge
- HotUKDealsBridge
- MydealsBridge
Add the currency in the i8n data of the bridges

- PepperBridgeAbstract
The Price, discount data ans Ships from information are in the HTML
content anymore, so switched to the js-vue2 attributes
2024-02-06 02:23:12 +01:00
Scott Colby
66a6847fd0 Two fixes to DeutscheWelle (#3954)
* [DeutscheWelleBridge] Small URL fix.

Reset the $item's uri value after removing the tracking query string.

* [DeutscheWelleBridge] Fix "hero" images.

The main "hero" image for each article has src="" and relies on the
srcset attribute for the browser to pick the best image based on the
actual displayed size.

The call to `defaultLinkTo()` replaces the empty src with the article's
link, which, not being an image, breaks the image.

This change resets the src's of any such images back to "".
2024-02-06 02:21:30 +01:00
sysadminstory
7931f37a83 [PepperBridgeAbstract] Fix deal image scraping (#3953)
Deal Image was moved to a vuejs element, the deal image scraping was
fixed.
2024-02-05 23:30:18 +01:00
Tostiman
d175bab58e Fix car throttle bridge (#3925) 2024-02-04 18:28:12 +01:00
Dag
7c89712837 ci: fix broken docs build (#3948) 2024-02-03 13:56:56 +01:00
ljf (zamentur)
a14508d79b Add sans-nuage instance (#3947) 2024-02-03 12:58:36 +01:00
271 changed files with 9469 additions and 9958 deletions

2
.gitattributes vendored
View File

@@ -47,8 +47,6 @@ phpcs.xml export-ignore
phpcompatibility.xml export-ignore
tests/ export-ignore
cache/.gitkeep export-ignore
bridges/DemoBridge.php export-ignore
bridges/FeedExpanderExampleBridge.php export-ignore
## Composer
#

1
.github/.gitignore vendored
View File

@@ -4,3 +4,4 @@
# Generated files
comment*.md
comment*.txt
*.html

86
.github/prtester.py vendored
View File

@@ -4,7 +4,9 @@ import re
from bs4 import BeautifulSoup
from datetime import datetime
from typing import Iterable
import os.path
import os
import glob
import urllib
# This script is specifically written to be used in automation for https://github.com/RSS-Bridge/rss-bridge
#
@@ -13,18 +15,33 @@ import os.path
# It also add a <base> tag with the url of em's public instance, so viewing
# the HTML file locally will actually work as designed.
ARTIFACT_FILE_EXTENSION = '.html'
class Instance:
name = ''
url = ''
def main(instances: Iterable[Instance], with_upload: bool, with_reduced_upload: bool, title: str, output_file: str):
start_date = datetime.now()
prid = os.getenv('PR')
artifact_base_url = f'https://rss-bridge.github.io/rss-bridge-tests/prs/{prid}'
artifact_directory = os.getcwd()
for file in glob.glob(f'*{ARTIFACT_FILE_EXTENSION}', root_dir=artifact_directory):
os.remove(file)
table_rows = []
for instance in instances:
page = requests.get(instance.url) # Use python requests to grab the rss-bridge main page
soup = BeautifulSoup(page.content, "html.parser") # use bs4 to turn the page into soup
bridge_cards = soup.select('.bridge-card') # get a soup-formatted list of all bridges on the rss-bridge page
table_rows += testBridges(instance, bridge_cards, with_upload, with_reduced_upload) # run the main scraping code with the list of bridges
table_rows += testBridges(
instance=instance,
bridge_cards=bridge_cards,
with_upload=with_upload,
with_reduced_upload=with_reduced_upload,
artifact_directory=artifact_directory,
artifact_base_url=artifact_base_url) # run the main scraping code with the list of bridges
with open(file=output_file, mode='w+', encoding='utf-8') as file:
table_rows_value = '\n'.join(sorted(table_rows))
file.write(f'''
@@ -36,7 +53,7 @@ def main(instances: Iterable[Instance], with_upload: bool, with_reduced_upload:
*last change: {start_date.strftime("%A %Y-%m-%d %H:%M:%S")}*
'''.strip())
def testBridges(instance: Instance, bridge_cards: Iterable, with_upload: bool, with_reduced_upload: bool) -> Iterable:
def testBridges(instance: Instance, bridge_cards: Iterable, with_upload: bool, with_reduced_upload: bool, artifact_directory: str, artifact_base_url: str) -> Iterable:
instance_suffix = ''
if instance.name:
instance_suffix = f' ({instance.name})'
@@ -45,15 +62,14 @@ def testBridges(instance: Instance, bridge_cards: Iterable, with_upload: bool, w
bridgeid = bridge_card.get('id')
bridgeid = bridgeid.split('-')[1] # this extracts a readable bridge name from the bridge metadata
print(f'{bridgeid}{instance_suffix}')
bridgestring = '/?action=display&bridge=' + bridgeid + '&format=Html'
bridge_name = bridgeid.replace('Bridge', '')
context_forms = bridge_card.find_all("form")
form_number = 1
for context_form in context_forms:
# a bridge can have multiple contexts, named 'forms' in html
# this code will produce a fully working formstring that should create a working feed when called
# this code will produce a fully working url that should create a working feed when called
# this will create an example feed for every single context, to test them all
formstring = ''
context_parameters = {}
error_messages = []
context_name = '*untitled*'
context_name_element = context_form.find_previous_sibling('h5')
@@ -62,27 +78,27 @@ def testBridges(instance: Instance, bridge_cards: Iterable, with_upload: bool, w
parameters = context_form.find_all("input")
lists = context_form.find_all("select")
# this for/if mess cycles through all available input parameters, checks if it required, then pulls
# the default or examplevalue and then combines it all together into the formstring
# the default or examplevalue and then combines it all together into the url parameters
# if an example or default value is missing for a required attribute, it will throw an error
# any non-required fields are not tested!!!
for parameter in parameters:
if parameter.get('type') == 'hidden' and parameter.get('name') == 'context':
cleanvalue = parameter.get('value').replace(" ","+")
formstring = formstring + '&' + parameter.get('name') + '=' + cleanvalue
if parameter.get('type') == 'number' or parameter.get('type') == 'text':
parameter_type = parameter.get('type')
parameter_name = parameter.get('name')
if parameter_type == 'hidden':
context_parameters[parameter_name] = parameter.get('value')
if parameter_type == 'number' or parameter_type == 'text':
if parameter.has_attr('required'):
if parameter.get('placeholder') == '':
if parameter.get('value') == '':
name_value = parameter.get('name')
error_messages.append(f'Missing example or default value for parameter "{name_value}"')
error_messages.append(f'Missing example or default value for parameter "{parameter_name}"')
else:
formstring = formstring + '&' + parameter.get('name') + '=' + parameter.get('value')
context_parameters[parameter_name] = parameter.get('value')
else:
formstring = formstring + '&' + parameter.get('name') + '=' + parameter.get('placeholder')
# same thing, just for checkboxes. If a checkbox is checked per default, it gets added to the formstring
if parameter.get('type') == 'checkbox':
context_parameters[parameter_name] = parameter.get('placeholder')
# same thing, just for checkboxes. If a checkbox is checked per default, it gets added to the url parameters
if parameter_type == 'checkbox':
if parameter.has_attr('checked'):
formstring = formstring + '&' + parameter.get('name') + '=on'
context_parameters[parameter_name] = 'on'
for listing in lists:
selectionvalue = ''
listname = listing.get('name')
@@ -102,15 +118,21 @@ def testBridges(instance: Instance, bridge_cards: Iterable, with_upload: bool, w
if 'selected' in selectionentry.attrs:
selectionvalue = selectionentry.get('value')
break
formstring = formstring + '&' + listname + '=' + selectionvalue
termpad_url = 'about:blank'
context_parameters[listname] = selectionvalue
artifact_url = 'about:blank'
if error_messages:
status = '<br>'.join(map(lambda m: f'❌ `{m}`', error_messages))
else:
# if all example/default values are present, form the full request string, run the request, add a <base> tag with
# if all example/default values are present, form the full request url, run the request, add a <base> tag with
# the url of em's public instance to the response text (so that relative paths work, e.g. to the static css file) and
# then upload it to termpad.com, a pastebin-like-site.
response = requests.get(instance.url + bridgestring + formstring)
# then save it to a html file.
context_parameters.update({
'action': 'display',
'bridge': bridgeid,
'format': 'Html',
})
request_url = f'{instance.url}/?{urllib.parse.urlencode(context_parameters)}'
response = requests.get(request_url)
page_text = response.text.replace('<head>','<head><base href="https://rss-bridge.org/bridge01/" target="_blank">')
page_text = page_text.encode("utf_8")
soup = BeautifulSoup(page_text, "html.parser")
@@ -134,16 +156,18 @@ def testBridges(instance: Instance, bridge_cards: Iterable, with_upload: bool, w
if status_is_ok:
status = '✔️'
if with_upload and (not with_reduced_upload or not status_is_ok):
termpad = requests.post(url="https://termpad.com/", data=page_text)
termpad_url = termpad.text.strip()
termpad_url = termpad_url.replace('termpad.com/','termpad.com/raw/')
table_rows.append(f'| {bridge_name} | [{form_number} {context_name}{instance_suffix}]({termpad_url}) | {status} |')
filename = f'{bridge_name} {form_number}{instance_suffix}{ARTIFACT_FILE_EXTENSION}'
filename = re.sub(r'[^a-z0-9 \_\-\.]', '', filename, flags=re.I).replace(' ', '_')
with open(file=f'{artifact_directory}/{filename}', mode='wb') as file:
file.write(page_text)
artifact_url = f'{artifact_base_url}/{filename}'
table_rows.append(f'| {bridge_name} | [{form_number} {context_name}{instance_suffix}]({artifact_url}) | {status} |')
form_number += 1
return table_rows
def getFirstLine(value: str) -> str:
# trim whitespace and remove text that can break the table or is simply unnecessary
clean_value = re.sub('^\[[^\]]+\]\s*rssbridge\.|[\|`]', '', value.strip())
clean_value = re.sub(r'^\[[^\]]+\]\s*rssbridge\.|[\|`]', '', value.strip())
first_line = next(iter(clean_value.splitlines()), '')
max_length = 250
if (len(first_line) > max_length):
@@ -163,8 +187,8 @@ if __name__ == '__main__':
for instance_arg in args.instances:
instance_arg_parts = instance_arg.split('::')
instance = Instance()
instance.name = instance_arg_parts[1] if len(instance_arg_parts) >= 2 else ''
instance.url = instance_arg_parts[0]
instance.name = instance_arg_parts[1].strip() if len(instance_arg_parts) >= 2 else ''
instance.url = instance_arg_parts[0].strip().rstrip("/")
instances.append(instance)
else:
instance = Instance()
@@ -181,4 +205,4 @@ if __name__ == '__main__':
with_reduced_upload=args.reduced_upload and not args.no_upload,
title=args.title,
output_file=args.output_file
);
);

View File

@@ -21,7 +21,7 @@ jobs:
-
name: Docker meta
id: docker_meta
uses: docker/metadata-action@v4
uses: docker/metadata-action@v5
with:
images: |
${{ env.DOCKERHUB_SLUG }}
@@ -33,26 +33,26 @@ jobs:
type=raw,value=stable,enable=${{ startsWith(github.ref, 'refs/tags/20') }}
-
name: Set up QEMU
uses: docker/setup-qemu-action@v2
uses: docker/setup-qemu-action@v3
-
name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
uses: docker/setup-buildx-action@v3
-
name: Login to DockerHub
uses: docker/login-action@v2
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_USERNAME }}
password: ${{ secrets.DOCKER_PASSWORD }}
-
name: Login to GitHub Container Registry
uses: docker/login-action@v2
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
-
name: Build and push
uses: docker/bake-action@v2
uses: docker/bake-action@v5
with:
files: |
./docker-bake.hcl

View File

@@ -9,7 +9,7 @@ jobs:
documentation:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
with:
persist-credentials: false
- name: Setup PHP

View File

@@ -13,7 +13,7 @@ jobs:
matrix:
php-versions: ['7.4']
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- uses: shivammathur/setup-php@v2
with:
php-version: ${{ matrix.php-versions }}
@@ -26,7 +26,7 @@ jobs:
matrix:
php-versions: ['7.4']
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- uses: shivammathur/setup-php@v2
with:
php-version: ${{ matrix.php-versions }}
@@ -38,7 +38,7 @@ jobs:
executable_php_files_check:
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- run: |
if find -name "*.php" -executable -type f -print -exec false {} +
then

View File

@@ -5,15 +5,30 @@ on:
branches: [ master ]
jobs:
check-bridges:
name: Check if bridges were changed
runs-on: ubuntu-latest
outputs:
BRIDGES: ${{ steps.check1.outputs.BRIDGES }}
steps:
- name: Check number of bridges
id: check1
run: |
PR=${{github.event.number}};
wget https://patch-diff.githubusercontent.com/raw/$GITHUB_REPOSITORY/pull/$PR.patch;
bridgeamount=$(cat $PR.patch | grep "\bbridges/[A-Za-z0-9]*Bridge\.php\b" | sed "s=.*\bbridges/\([A-Za-z0-9]*\)Bridge\.php\b.*=\1=g" | sort | uniq | wc -l);
echo "BRIDGES=$bridgeamount" >> "$GITHUB_OUTPUT"
test-pr:
name: Generate HTML
runs-on: ubuntu-latest
needs: check-bridges
if: needs.check-bridges.outputs.BRIDGES > 0
env:
PYTHONUNBUFFERED: 1
# Needs additional permissions https://github.com/actions/first-interaction/issues/10#issuecomment-1041402989
steps:
- name: Check out self
uses: actions/checkout@v3
uses: actions/checkout@v4
with:
ref: ${{github.event.pull_request.head.ref}}
repository: ${{github.event.pull_request.head.repo.full_name}}
@@ -33,9 +48,9 @@ jobs:
docker build -t prbuild .;
docker run -d -v $GITHUB_WORKSPACE/whitelist.txt:/app/whitelist.txt -v $GITHUB_WORKSPACE/DEBUG:/app/DEBUG -p 3001:80 prbuild
- name: Setup python
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: '3.7'
python-version: '3.13'
cache: 'pip'
- name: Install requirements
run: |
@@ -51,9 +66,17 @@ jobs:
body="${body//$'\n'/'%0A'}";
body="${body//$'\r'/'%0D'}";
echo "bodylength=${#body}" >> $GITHUB_OUTPUT
env:
PR: ${{ github.event.number }}
- name: Upload generated tests
uses: actions/upload-artifact@v4
id: upload-generated-tests
with:
name: tests
path: '*.html'
- name: Find Comment
if: ${{ steps.testrun.outputs.bodylength > 130 }}
uses: peter-evans/find-comment@v2
uses: peter-evans/find-comment@v3
id: fc
with:
issue-number: ${{ github.event.pull_request.number }}
@@ -61,9 +84,43 @@ jobs:
body-includes: Pull request artifacts
- name: Create or update comment
if: ${{ steps.testrun.outputs.bodylength > 130 }}
uses: peter-evans/create-or-update-comment@v2
uses: peter-evans/create-or-update-comment@v4
with:
comment-id: ${{ steps.fc.outputs.comment-id }}
issue-number: ${{ github.event.pull_request.number }}
body-file: comment.txt
edit-mode: replace
upload_tests:
name: Upload tests
runs-on: ubuntu-latest
needs: test-pr
steps:
- uses: actions/checkout@v4
with:
repository: 'RSS-Bridge/rss-bridge-tests'
ref: 'main'
token: ${{ secrets.RSSTESTER_ACTION }}
- name: Setup git config
run: |
git config --global user.name "GitHub Actions"
git config --global user.email "<>"
- name: Download tests
uses: actions/download-artifact@v4
with:
name: tests
- name: Move tests
run: |
cd prs
mkdir -p ${{github.event.number}}
cd ${{github.event.number}}
mv -f $GITHUB_WORKSPACE/*.html .
- name: Commit and push generated tests
run: |
export COMMIT_MESSAGE="Added tests for PR ${{github.event.number}}"
git add .
git commit -m "$COMMIT_MESSAGE"
git push

View File

@@ -13,9 +13,11 @@ jobs:
matrix:
php-versions: ['7.4', '8.0', '8.1']
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- uses: shivammathur/setup-php@v2
with:
php-version: ${{ matrix.php-versions }}
env:
update: true
- run: composer install
- run: composer test

View File

@@ -144,6 +144,7 @@
* [Niehztog](https://github.com/Niehztog)
* [NikNikYkt](https://github.com/NikNikYkt)
* [Nono-m0le](https://github.com/Nono-m0le)
* [NotsoanoNimus](https://github.com/NotsoanoNimus)
* [obsiwitch](https://github.com/obsiwitch)
* [Ololbu](https://github.com/Ololbu)
* [ORelio](https://github.com/ORelio)

View File

@@ -1,5 +1,3 @@
FROM lwthiker/curl-impersonate:0.5-ff-slim-buster AS curlimpersonate
FROM debian:12-slim AS rssbridge
LABEL description="RSS-Bridge is a PHP project capable of generating RSS and Atom feeds for websites that don't have one."
@@ -7,7 +5,8 @@ LABEL repository="https://github.com/RSS-Bridge/rss-bridge"
LABEL website="https://github.com/RSS-Bridge/rss-bridge"
ARG DEBIAN_FRONTEND=noninteractive
RUN apt-get update && \
RUN set -xe && \
apt-get update && \
apt-get install --yes --no-install-recommends \
ca-certificates \
nginx \
@@ -24,18 +23,44 @@ RUN apt-get update && \
php-xml \
php-zip \
# php-zlib is enabled by default with PHP 8.2 in Debian 12
# for downloading libcurl-impersonate
curl \
&& \
# install curl-impersonate library
curlimpersonate_version=0.6.0 && \
{ \
{ \
[ $(arch) = 'aarch64' ] && \
archive="libcurl-impersonate-v${curlimpersonate_version}.aarch64-linux-gnu.tar.gz" && \
sha512sum="d04b1eabe71f3af06aa1ce99b39a49c5e1d33b636acedcd9fad163bc58156af5c3eb3f75aa706f335515791f7b9c7a6c40ffdfa47430796483ecef929abd905d" \
; } \
|| { \
[ $(arch) = 'armv7l' ] && \
archive="libcurl-impersonate-v${curlimpersonate_version}.arm-linux-gnueabihf.tar.gz" && \
sha512sum="05906b4efa1a6ed8f3b716fd83d476b6eea6bfc68e3dbc5212d65a2962dcaa7bd1f938c9096a7535252b11d1d08fb93adccc633585ff8cb8cec5e58bfe969bc9" \
; } \
|| { \
[ $(arch) = 'x86_64' ] && \
archive="libcurl-impersonate-v${curlimpersonate_version}.x86_64-linux-gnu.tar.gz" && \
sha512sum="480bbe9452cd9aff2c0daaaf91f1057b3a96385f79011628a9237223757a9b0d090c59cb5982dc54ea0d07191657299ea91ca170a25ced3d7d410fcdff130ace" \
; } \
} && \
curl -LO "https://github.com/lwthiker/curl-impersonate/releases/download/v${curlimpersonate_version}/${archive}" && \
echo "$sha512sum $archive" | sha512sum -c - && \
mkdir -p /usr/local/lib/curl-impersonate && \
tar xaf "$archive" -C /usr/local/lib/curl-impersonate --wildcards 'libcurl-impersonate-ff.so*' && \
rm "$archive" && \
apt-get purge --assume-yes curl && \
rm -rf /var/lib/apt/lists/*
ENV LD_PRELOAD /usr/local/lib/curl-impersonate/libcurl-impersonate-ff.so
ENV CURL_IMPERSONATE ff91esr
# logs should go to stdout / stderr
RUN ln -sfT /dev/stderr /var/log/nginx/error.log; \
ln -sfT /dev/stdout /var/log/nginx/access.log; \
chown -R --no-dereference www-data:adm /var/log/nginx/
COPY --from=curlimpersonate /usr/local/lib/libcurl-impersonate-ff.so /usr/local/lib/curl-impersonate/
ENV LD_PRELOAD /usr/local/lib/curl-impersonate/libcurl-impersonate-ff.so
ENV CURL_IMPERSONATE ff91esr
COPY ./config/nginx.conf /etc/nginx/sites-available/default
COPY ./config/php-fpm.conf /etc/php/8.2/fpm/pool.d/rss-bridge.conf
COPY ./config/php.ini /etc/php/8.2/fpm/conf.d/90-rss-bridge.ini

View File

@@ -53,9 +53,10 @@ Requires minimum PHP 7.4.
### How to install on traditional shared web hosting
RSS-Bridge can basically be unzipped in a web folder. Should be working instantly.
RSS-Bridge can basically be unzipped into a web folder. Should be working instantly.
Latest zip as of Sep 2023: https://github.com/RSS-Bridge/rss-bridge/archive/refs/tags/2023-09-24.zip
Latest zip:
https://github.com/RSS-Bridge/rss-bridge/archive/refs/heads/master.zip (2MB)
### How to install on Debian 12 (nginx + php-fpm)
@@ -64,9 +65,9 @@ These instructions have been tested on a fresh Debian 12 VM from Digital Ocean (
```shell
timedatectl set-timezone Europe/Oslo
apt install git nginx php8.2-fpm php-mbstring php-simplexml php-curl
apt install git nginx php8.2-fpm php-mbstring php-simplexml php-curl php-intl
# Create a new user account
# Create a user account
useradd --shell /bin/bash --create-home rss-bridge
cd /var/www
@@ -101,9 +102,13 @@ Nginx config:
server {
listen 80;
# TODO: change to your own server name
server_name example.com;
access_log /var/log/nginx/rss-bridge.access.log;
error_log /var/log/nginx/rss-bridge.error.log;
log_not_found off;
# Intentionally not setting a root folder here
@@ -115,23 +120,22 @@ server {
alias /var/www/rss-bridge/static/;
}
# Pass off to php-fpm only when location is exactly /
# Pass off to php-fpm when location is exactly /
location = / {
root /var/www/rss-bridge/;
include snippets/fastcgi-php.conf;
fastcgi_read_timeout 45s;
fastcgi_pass unix:/run/php/rss-bridge.sock;
}
# Reduce spam
location = /favicon.ico {
access_log off;
log_not_found off;
}
# Reduce spam
location = /robots.txt {
access_log off;
log_not_found off;
}
}
```
@@ -150,8 +154,11 @@ listen = /run/php/rss-bridge.sock
listen.owner = www-data
listen.group = www-data
# Create 10 workers standing by to serve requests
pm = static
pm.max_children = 10
# Respawn worker after 500 requests (workaround for memory leaks etc.)
pm.max_requests = 500
```
@@ -167,12 +174,10 @@ Restart fpm and nginx:
```shell
# Lint and restart php-fpm
php-fpm8.2 -t
systemctl restart php8.2-fpm
php-fpm8.2 -t && systemctl restart php8.2-fpm
# Lint and restart nginx
nginx -t
systemctl restart nginx
nginx -t && systemctl restart nginx
```
### How to install from Composer
@@ -181,7 +186,7 @@ Install the latest release.
```shell
cd /var/www
composer create-project -v --no-dev rss-bridge/rss-bridge
composer create-project -v --no-dev --no-scripts rss-bridge/rss-bridge
```
### How to install with Caddy
@@ -194,8 +199,16 @@ Install by downloading the docker image from Docker Hub:
```bash
# Create container
docker create --name=rss-bridge --publish 3000:80 rssbridge/rss-bridge
docker create --name=rss-bridge --publish 3000:80 --volume $(pwd)/config:/config rssbridge/rss-bridge
```
You can put custom `config.ini.php` and bridges into `./config`.
**You must restart container for custom changes to take effect.**
See `docker-entrypoint.sh` for details.
```bash
# Start container
docker start rss-bridge
```
@@ -209,30 +222,29 @@ Browse http://localhost:3000/
docker build -t rss-bridge .
# Create container
docker create --name rss-bridge --publish 3000:80 rss-bridge
docker create --name rss-bridge --publish 3000:80 --volume $(pwd)/config:/config rss-bridge
```
You can put custom `config.ini.php` and bridges into `./config`.
**You must restart container for custom changes to take effect.**
See `docker-entrypoint.sh` for details.
```bash
# Start container
docker start rss-bridge
```
Browse http://localhost:3000/
### Install with docker-compose
### Install with docker-compose (using Docker Hub)
Create a `docker-compose.yml` file locally with with the following content:
```yml
version: '2'
services:
rss-bridge:
image: rssbridge/rss-bridge:latest
volumes:
- </local/custom/path>:/config
ports:
- 3000:80
restart: unless-stopped
```
You can put custom `config.ini.php` and bridges into `./config`.
Then launch with `docker-compose`:
**You must restart container for custom changes to take effect.**
See `docker-entrypoint.sh` for details.
```bash
docker-compose up
@@ -420,7 +432,16 @@ See `formats/PlaintextFormat.php` for an example.
These commands require that you have installed the dev dependencies in `composer.json`.
Run all tests:
./vendor/bin/phpunit
Run a single test class:
./vendor/bin/phpunit --filter UrlTest
Run linter:
./vendor/bin/phpcs --standard=phpcs.xml --warning-severity=0 --extensions=php -p ./
https://github.com/squizlabs/PHP_CodeSniffer/wiki

View File

@@ -14,12 +14,13 @@ class ConnectivityAction implements ActionInterface
{
private BridgeFactory $bridgeFactory;
public function __construct()
{
$this->bridgeFactory = new BridgeFactory();
public function __construct(
BridgeFactory $bridgeFactory
) {
$this->bridgeFactory = $bridgeFactory;
}
public function execute(Request $request)
public function __invoke(Request $request): Response
{
if (!Debug::isEnabled()) {
return new Response('This action is only available in debug mode!', 403);
@@ -27,7 +28,7 @@ class ConnectivityAction implements ActionInterface
$bridgeName = $request->get('bridge');
if (!$bridgeName) {
return render_template('connectivity.html.php');
return new Response(render_template('connectivity.html.php'));
}
$bridgeClassName = $this->bridgeFactory->createBridgeClassName($bridgeName);
if (!$bridgeClassName) {
@@ -54,8 +55,8 @@ class ConnectivityAction implements ActionInterface
];
try {
$response = getContents($bridge::URI, [], $curl_opts, true);
$result['http_code'] = $response['code'];
if (in_array($response['code'], [200])) {
$result['http_code'] = $response->getCode();
if (in_array($result['http_code'], [200])) {
$result['successful'] = true;
}
} catch (\Exception $e) {

View File

@@ -2,7 +2,15 @@
class DetectAction implements ActionInterface
{
public function execute(Request $request)
private BridgeFactory $bridgeFactory;
public function __construct(
BridgeFactory $bridgeFactory
) {
$this->bridgeFactory = $bridgeFactory;
}
public function __invoke(Request $request): Response
{
$url = $request->get('url');
$format = $request->get('format');
@@ -14,14 +22,12 @@ class DetectAction implements ActionInterface
return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => 'You must specify a format']));
}
$bridgeFactory = new BridgeFactory();
foreach ($bridgeFactory->getBridgeClassNames() as $bridgeClassName) {
if (!$bridgeFactory->isEnabled($bridgeClassName)) {
foreach ($this->bridgeFactory->getBridgeClassNames() as $bridgeClassName) {
if (!$this->bridgeFactory->isEnabled($bridgeClassName)) {
continue;
}
$bridge = $bridgeFactory->create($bridgeClassName);
$bridge = $this->bridgeFactory->create($bridgeClassName);
$bridgeParams = $bridge->detectParameters($url);

View File

@@ -4,42 +4,28 @@ class DisplayAction implements ActionInterface
{
private CacheInterface $cache;
private Logger $logger;
private BridgeFactory $bridgeFactory;
public function __construct()
{
$this->cache = RssBridge::getCache();
$this->logger = RssBridge::getLogger();
public function __construct(
CacheInterface $cache,
Logger $logger,
BridgeFactory $bridgeFactory
) {
$this->cache = $cache;
$this->logger = $logger;
$this->bridgeFactory = $bridgeFactory;
}
public function execute(Request $request)
public function __invoke(Request $request): Response
{
$bridgeName = $request->get('bridge');
$format = $request->get('format');
$noproxy = $request->get('_noproxy');
$cacheKey = 'http_' . json_encode($request->toArray());
/** @var Response $cachedResponse */
$cachedResponse = $this->cache->get($cacheKey);
if ($cachedResponse) {
$ifModifiedSince = $_SERVER['HTTP_IF_MODIFIED_SINCE'] ?? null;
$lastModified = $cachedResponse->getHeader('last-modified');
if ($ifModifiedSince && $lastModified) {
$lastModified = new \DateTimeImmutable($lastModified);
$lastModifiedTimestamp = $lastModified->getTimestamp();
$modifiedSince = strtotime($ifModifiedSince);
if ($lastModifiedTimestamp <= $modifiedSince) {
$modificationTimeGMT = gmdate('D, d M Y H:i:s ', $lastModifiedTimestamp);
return new Response('', 304, ['last-modified' => $modificationTimeGMT . 'GMT']);
}
}
return $cachedResponse;
}
if (!$bridgeName) {
return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => 'Missing bridge parameter']), 400);
}
$bridgeFactory = new BridgeFactory();
$bridgeClassName = $bridgeFactory->createBridgeClassName($bridgeName);
$bridgeClassName = $this->bridgeFactory->createBridgeClassName($bridgeName);
if (!$bridgeClassName) {
return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => 'Bridge not found']), 404);
}
@@ -47,11 +33,11 @@ class DisplayAction implements ActionInterface
if (!$format) {
return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => 'You must specify a format']), 400);
}
if (!$bridgeFactory->isEnabled($bridgeClassName)) {
if (!$this->bridgeFactory->isEnabled($bridgeClassName)) {
return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => 'This bridge is not whitelisted']), 400);
}
// Disable proxy (if enabled and per user's request)
if (
Configuration::getConfig('proxy', 'url')
&& Configuration::getConfig('proxy', 'by_bridge')
@@ -61,9 +47,9 @@ class DisplayAction implements ActionInterface
define('NOPROXY', true);
}
$bridge = $bridgeFactory->create($bridgeClassName);
$formatFactory = new FormatFactory();
$format = $formatFactory->create($format);
$cacheKey = 'http_' . json_encode($request->toArray());
$bridge = $this->bridgeFactory->create($bridgeClassName);
$response = $this->createResponse($request, $bridge, $format);
@@ -77,26 +63,12 @@ class DisplayAction implements ActionInterface
$this->cache->set($cacheKey, $response, $ttl);
}
if (in_array($response->getCode(), [403, 429, 503])) {
// Cache these responses for about ~20 mins on average
$this->cache->set($cacheKey, $response, 60 * 15 + rand(1, 60 * 10));
}
if ($response->getCode() === 500) {
$this->cache->set($cacheKey, $response, 60 * 15);
}
if (rand(1, 100) === 2) {
$this->cache->prune();
}
return $response;
}
private function createResponse(Request $request, BridgeAbstract $bridge, FormatAbstract $format)
private function createResponse(Request $request, BridgeAbstract $bridge, string $format)
{
$items = [];
$feed = [];
try {
$bridge->loadConfiguration();
@@ -116,28 +88,23 @@ class DisplayAction implements ActionInterface
$bridge->setInput($input);
$bridge->collectData();
$items = $bridge->getItems();
if (isset($items[0]) && is_array($items[0])) {
$feedItems = [];
foreach ($items as $item) {
$feedItems[] = FeedItem::fromArray($item);
}
$items = $feedItems;
} catch (\Throwable $e) {
if ($e instanceof RateLimitException) {
// These are internally generated by bridges
$this->logger->info(sprintf('RateLimitException in DisplayAction(%s): %s', $bridge->getShortName(), create_sane_exception_message($e)));
return new Response(render(__DIR__ . '/../templates/exception.html.php', ['e' => $e]), 429);
}
$feed = $bridge->getFeed();
} catch (\Exception $e) {
// Probably an exception inside a bridge
if ($e instanceof HttpException) {
// Reproduce (and log) these responses regardless of error output and report limit
if ($e->getCode() === 429) {
$this->logger->info(sprintf('Exception in DisplayAction(%s): %s', $bridge->getShortName(), create_sane_exception_message($e)));
return new Response(render(__DIR__ . '/../templates/exception.html.php', ['e' => $e]), 429);
}
if ($e->getCode() === 503) {
$this->logger->info(sprintf('Exception in DisplayAction(%s): %s', $bridge->getShortName(), create_sane_exception_message($e)));
return new Response(render(__DIR__ . '/../templates/exception.html.php', ['e' => $e]), 503);
if (in_array($e->getCode(), [429, 503])) {
// Log with debug, immediately reproduce and return
$this->logger->debug(sprintf('Exception in DisplayAction(%s): %s', $bridge->getShortName(), create_sane_exception_message($e)));
return new Response(render(__DIR__ . '/../templates/exception.html.php', ['e' => $e]), $e->getCode());
}
// Some other status code which we let fail normally (but don't log it)
} else {
// Log error if it's not an HttpException
$this->logger->error(sprintf('Exception in DisplayAction(%s)', $bridge->getShortName()), ['e' => $e]);
}
$this->logger->error(sprintf('Exception in DisplayAction(%s)', $bridge->getShortName()), ['e' => $e]);
$errorOutput = Configuration::getConfig('error', 'output');
$reportLimit = Configuration::getConfig('error', 'report_limit');
$errorCount = 1;
@@ -148,7 +115,7 @@ class DisplayAction implements ActionInterface
if ($errorCount >= $reportLimit) {
if ($errorOutput === 'feed') {
// Render the exception as a feed item
$items[] = $this->createFeedItemFromException($e, $bridge);
$items = [$this->createFeedItemFromException($e, $bridge)];
} elseif ($errorOutput === 'http') {
return new Response(render(__DIR__ . '/../templates/exception.html.php', ['e' => $e]), 500);
} elseif ($errorOutput === 'none') {
@@ -157,38 +124,49 @@ class DisplayAction implements ActionInterface
}
}
$formatFactory = new FormatFactory();
$format = $formatFactory->create($format);
$format->setItems($items);
$format->setFeed($feed);
$format->setFeed($bridge->getFeed());
$now = time();
$format->setLastModified($now);
$headers = [
'last-modified' => gmdate('D, d M Y H:i:s ', $now) . 'GMT',
'content-type' => $format->getMimeType() . '; charset=' . $format->getCharset(),
'content-type' => $format->getMimeType() . '; charset=UTF-8',
];
return new Response($format->stringify(), 200, $headers);
$body = $format->render();
// This is supposed to remove non-utf8 byte sequences, but I'm unsure if it works
ini_set('mbstring.substitute_character', 'none');
$body = mb_convert_encoding($body, 'UTF-8', 'UTF-8');
return new Response($body, 200, $headers);
}
private function createFeedItemFromException($e, BridgeAbstract $bridge): FeedItem
private function createFeedItemFromException($e, BridgeAbstract $bridge): array
{
$item = new FeedItem();
$item = [];
// Create a unique identifier every 24 hours
$uniqueIdentifier = urlencode((int)(time() / 86400));
$title = sprintf('Bridge returned error %s! (%s)', $e->getCode(), $uniqueIdentifier);
$item->setTitle($title);
$item->setURI(get_current_url());
$item->setTimestamp(time());
$item['title'] = $title;
$item['uri'] = get_current_url();
$item['timestamp'] = time();
// Create an item identifier for feed readers e.g. "staysafetv twitch videos_19389"
$item->setUid($bridge->getName() . '_' . $uniqueIdentifier);
$item['uid'] = $bridge->getName() . '_' . $uniqueIdentifier;
$content = render_template(__DIR__ . '/../templates/bridge-error.html.php', [
'error' => render_template(__DIR__ . '/../templates/exception.html.php', ['e' => $e]),
'searchUrl' => self::createGithubSearchUrl($bridge),
'issueUrl' => self::createGithubIssueUrl($bridge, $e, create_sane_exception_message($e)),
'issueUrl' => self::createGithubIssueUrl($bridge, $e),
'maintainer' => $bridge->getMaintainer(),
]);
$item->setContent($content);
$item['content'] = $content;
return $item;
}
@@ -213,22 +191,34 @@ class DisplayAction implements ActionInterface
return $report['count'];
}
private static function createGithubIssueUrl($bridge, $e, string $message): string
private static function createGithubIssueUrl(BridgeAbstract $bridge, \Throwable $e): string
{
return sprintf('https://github.com/RSS-Bridge/rss-bridge/issues/new?%s', http_build_query([
'title' => sprintf('%s failed with error %s', $bridge->getName(), $e->getCode()),
$maintainer = $bridge->getMaintainer();
if (str_contains($maintainer, ',')) {
$maintainers = explode(',', $maintainer);
} else {
$maintainers = [$maintainer];
}
$maintainers = array_map('trim', $maintainers);
$queryString = $_SERVER['QUERY_STRING'] ?? '';
$query = [
'title' => $bridge->getName() . ' failed with: ' . $e->getMessage(),
'body' => sprintf(
"```\n%s\n\n%s\n\nQuery string: %s\nVersion: %s\nOs: %s\nPHP version: %s\n```",
$message,
"```\n%s\n\n%s\n\nQuery string: %s\nVersion: %s\nOs: %s\nPHP version: %s\n```\nMaintainer: @%s",
create_sane_exception_message($e),
implode("\n", trace_to_call_points(trace_from_exception($e))),
$_SERVER['QUERY_STRING'] ?? '',
$queryString,
Configuration::getVersion(),
PHP_OS_FAMILY,
phpversion() ?: 'Unknown'
phpversion() ?: 'Unknown',
implode(', @', $maintainers),
),
'labels' => 'Bridge-Broken',
'assignee' => $bridge->getMaintainer(),
]));
'assignee' => $maintainer[0],
];
return 'https://github.com/RSS-Bridge/rss-bridge/issues/new?' . http_build_query($query);
}
private static function createGithubSearchUrl($bridge): string

View File

@@ -7,7 +7,15 @@
*/
class FindfeedAction implements ActionInterface
{
public function execute(Request $request)
private BridgeFactory $bridgeFactory;
public function __construct(
BridgeFactory $bridgeFactory
) {
$this->bridgeFactory = $bridgeFactory;
}
public function __invoke(Request $request): Response
{
$url = $request->get('url');
$format = $request->get('format');
@@ -19,15 +27,13 @@ class FindfeedAction implements ActionInterface
return new Response('You must specify a format', 400);
}
$bridgeFactory = new BridgeFactory();
$results = [];
foreach ($bridgeFactory->getBridgeClassNames() as $bridgeClassName) {
if (!$bridgeFactory->isEnabled($bridgeClassName)) {
foreach ($this->bridgeFactory->getBridgeClassNames() as $bridgeClassName) {
if (!$this->bridgeFactory->isEnabled($bridgeClassName)) {
continue;
}
$bridge = $bridgeFactory->create($bridgeClassName);
$bridge = $this->bridgeFactory->create($bridgeClassName);
$bridgeParams = $bridge->detectParameters($url);

View File

@@ -2,15 +2,24 @@
final class FrontpageAction implements ActionInterface
{
public function execute(Request $request)
private BridgeFactory $bridgeFactory;
public function __construct(
BridgeFactory $bridgeFactory
) {
$this->bridgeFactory = $bridgeFactory;
}
public function __invoke(Request $request): Response
{
$token = $request->attribute('token');
$messages = [];
$activeBridges = 0;
$bridgeFactory = new BridgeFactory();
$bridgeClassNames = $bridgeFactory->getBridgeClassNames();
$bridgeClassNames = $this->bridgeFactory->getBridgeClassNames();
foreach ($bridgeFactory->getMissingEnabledBridges() as $missingEnabledBridge) {
foreach ($this->bridgeFactory->getMissingEnabledBridges() as $missingEnabledBridge) {
$messages[] = [
'body' => sprintf('Warning : Bridge "%s" not found', $missingEnabledBridge),
'level' => 'warning'
@@ -19,20 +28,22 @@ final class FrontpageAction implements ActionInterface
$body = '';
foreach ($bridgeClassNames as $bridgeClassName) {
if ($bridgeFactory->isEnabled($bridgeClassName)) {
$body .= BridgeCard::render($bridgeClassName, $request);
if ($this->bridgeFactory->isEnabled($bridgeClassName)) {
$body .= BridgeCard::render($this->bridgeFactory, $bridgeClassName, $token);
$activeBridges++;
}
}
// todo: cache this renderered template?
return render(__DIR__ . '/../templates/frontpage.html.php', [
'messages' => $messages,
'admin_email' => Configuration::getConfig('admin', 'email'),
'admin_telegram' => Configuration::getConfig('admin', 'telegram'),
'bridges' => $body,
'active_bridges' => $activeBridges,
'total_bridges' => count($bridgeClassNames),
]);
$response = new Response(render(__DIR__ . '/../templates/frontpage.html.php', [
'messages' => $messages,
'admin_email' => Configuration::getConfig('admin', 'email'),
'admin_telegram' => Configuration::getConfig('admin', 'telegram'),
'bridges' => $body,
'active_bridges' => $activeBridges,
'total_bridges' => count($bridgeClassNames),
]));
// TODO: The rendered template could be cached, but beware config changes that changes the html
return $response;
}
}

View File

@@ -4,7 +4,7 @@ declare(strict_types=1);
class HealthAction implements ActionInterface
{
public function execute(Request $request)
public function __invoke(Request $request): Response
{
$response = [
'code' => 200,

View File

@@ -2,19 +2,25 @@
class ListAction implements ActionInterface
{
public function execute(Request $request)
private BridgeFactory $bridgeFactory;
public function __construct(
BridgeFactory $bridgeFactory
) {
$this->bridgeFactory = $bridgeFactory;
}
public function __invoke(Request $request): Response
{
$list = new \stdClass();
$list->bridges = [];
$list->total = 0;
$bridgeFactory = new BridgeFactory();
foreach ($bridgeFactory->getBridgeClassNames() as $bridgeClassName) {
$bridge = $bridgeFactory->create($bridgeClassName);
foreach ($this->bridgeFactory->getBridgeClassNames() as $bridgeClassName) {
$bridge = $this->bridgeFactory->create($bridgeClassName);
$list->bridges[$bridgeClassName] = [
'status' => $bridgeFactory->isEnabled($bridgeClassName) ? 'active' : 'inactive',
'status' => $this->bridgeFactory->isEnabled($bridgeClassName) ? 'active' : 'inactive',
'uri' => $bridge->getURI(),
'donationUri' => $bridge->getDonationURI(),
'name' => $bridge->getName(),

View File

@@ -6,9 +6,11 @@
*/
require __DIR__ . '/../lib/bootstrap.php';
require __DIR__ . '/../lib/config.php';
$rssBridge = new RssBridge();
$container = require __DIR__ . '/../lib/dependencies.php';
$cache = RssBridge::getCache();
/** @var CacheInterface $cache */
$cache = $container['cache'];
$cache->clear();

View File

@@ -6,9 +6,19 @@
*/
require __DIR__ . '/../lib/bootstrap.php';
require __DIR__ . '/../lib/config.php';
$rssBridge = new RssBridge();
$container = require __DIR__ . '/../lib/dependencies.php';
$cache = RssBridge::getCache();
if (
Configuration::getConfig('cache', 'type') === 'file'
&& !Configuration::getConfig('FileCache', 'enable_purge')
) {
// Override enable_purge for this particular execution
Configuration::setConfig('FileCache', 'enable_purge', true);
}
/** @var CacheInterface $cache */
$cache = $container['cache'];
$cache->prune();

20
bin/test Executable file
View File

@@ -0,0 +1,20 @@
#!/usr/bin/env php
<?php
/**
* Add log records to all three levels (for testing purposes)
*/
require __DIR__ . '/../lib/bootstrap.php';
require __DIR__ . '/../lib/config.php';
$container = require __DIR__ . '/../lib/dependencies.php';
/** @var Logger $logger */
$logger = $container['logger'];
$logger->debug('This is a test debug message');
$logger->info('This is a test info message');
$logger->error('This is a test error message');

View File

@@ -31,17 +31,17 @@ class ABCNewsBridge extends BridgeAbstract
{
$url = sprintf('https://www.abc.net.au/news/%s', $this->getInput('topic'));
$dom = getSimpleHTMLDOM($url);
$dom = $dom->find('div[data-component="CardList"]', 0);
$dom = $dom->find('div[data-component="PaginationList"]', 0);
if (!$dom) {
throw new \Exception(sprintf('Unable to find css selector on `%s`', $url));
}
$dom = defaultLinkTo($dom, $this->getURI());
foreach ($dom->find('div[data-component="GenericCard"]') as $article) {
foreach ($dom->find('article[data-component="DetailCard"]') as $article) {
$a = $article->find('a', 0);
$this->items[] = [
'title' => $a->plaintext,
'uri' => $a->href,
'content' => $article->find('[data-component="CardDescription"]', 0)->plaintext,
'content' => $article->find('p', 0)->plaintext,
'timestamp' => strtotime($article->find('time', 0)->datetime),
];
}

View File

@@ -12,9 +12,22 @@ class AO3Bridge extends BridgeAbstract
'url' => [
'name' => 'url',
'required' => true,
// Example: F/F tag, complete works only
'exampleValue' => 'https://archiveofourown.org/works?work_search[complete]=T&tag_id=F*s*F',
// Example: F/F tag
'exampleValue' => 'https://archiveofourown.org/tags/F*s*F/works',
],
'range' => [
'name' => 'Chapter Content',
'title' => 'Chapter(s) to include in each work\'s feed entry',
'defaultValue' => null,
'type' => 'list',
'values' => [
'None' => null,
'First' => 'first',
'Latest' => 'last',
'Entire work' => 'all',
],
],
'limit' => self::LIMIT,
],
'Bookmarks' => [
'user' => [
@@ -39,18 +52,13 @@ class AO3Bridge extends BridgeAbstract
{
switch ($this->queriedContext) {
case 'Bookmarks':
$user = $this->getInput('user');
$this->title = $user;
$url = self::URI
. '/users/' . $user
. '/bookmarks?bookmark_search[sort_column]=bookmarkable_date';
$this->collectList($url);
$this->collectList($this->getURI());
break;
case 'List':
$this->collectList($this->getInput('url'));
$this->collectList($this->getURI());
break;
case 'Work':
$this->collectWork($this->getInput('id'));
$this->collectWork($this->getURI());
break;
}
}
@@ -61,9 +69,24 @@ class AO3Bridge extends BridgeAbstract
*/
private function collectList($url)
{
$html = getSimpleHTMLDOM($url);
$version = 'v0.0.1';
$headers = [
"useragent: rss-bridge $version (https://github.com/RSS-Bridge/rss-bridge)"
];
$response = getContents($url, $headers);
$html = \str_get_html($response);
$html = defaultLinkTo($html, self::URI);
// Get list title. Will include page range + count in some cases
$heading = ($html->find('#main h2', 0));
if ($heading->find('a.tag')) {
$heading = $heading->find('a.tag', 0);
}
$this->title = $heading->plaintext;
$limit = $this->getInput('limit') ?? 3;
$count = 0;
foreach ($html->find('.index.group > li') as $element) {
$item = [];
@@ -72,17 +95,66 @@ class AO3Bridge extends BridgeAbstract
continue; // discard deleted works
}
$item['title'] = $title->plaintext;
$item['content'] = $element;
$item['uri'] = $title->href;
$strdate = $element->find('div p.datetime', 0)->plaintext;
$item['timestamp'] = strtotime($strdate);
// detach from rest of page because remove() is buggy
$element = str_get_html($element->outertext());
$tags = $element->find('ul.required-tags', 0);
foreach ($tags->childNodes() as $tag) {
$item['categories'][] = html_entity_decode($tag->plaintext);
}
$tags->remove();
$tags = $element->find('ul.tags', 0);
foreach ($tags->childNodes() as $tag) {
$item['categories'][] = html_entity_decode($tag->plaintext);
}
$tags->remove();
$item['content'] = implode('', $element->childNodes());
$chapters = $element->find('dl dd.chapters', 0);
// bookmarked series and external works do not have a chapters count
$chapters = (isset($chapters) ? $chapters->plaintext : 0);
$item['uid'] = $item['uri'] . "/$strdate/$chapters";
// Fetch workskin of desired chapter(s) in list
if ($this->getInput('range') && ($limit == 0 || $count++ < $limit)) {
$url = $item['uri'];
switch ($this->getInput('range')) {
case ('all'):
$url .= '?view_full_work=true';
break;
case ('first'):
break;
case ('last'):
// only way to get this is using the navigate page unfortunately
$url .= '/navigate';
$response = getContents($url, $headers);
$html = \str_get_html($response);
$html = defaultLinkTo($html, self::URI);
$url = $html->find('ol.index.group > li > a', -1)->href;
break;
}
$response = getContents($url, $headers);
$html = \str_get_html($response);
$html = defaultLinkTo($html, self::URI);
// remove duplicate fic summary
if ($ficsum = $html->find('#workskin > .preface > .summary', 0)) {
$ficsum->remove();
}
$item['content'] .= $html->find('#workskin', 0);
}
// Use predictability of download links to generate enclosures
$wid = explode('/', $item['uri'])[4];
foreach (['azw3', 'epub', 'mobi', 'pdf', 'html'] as $ext) {
$item['enclosures'][] = 'https://archiveofourown.org/downloads/' . $wid . '/work.' . $ext;
}
$this->items[] = $item;
}
}
@@ -90,26 +162,31 @@ class AO3Bridge extends BridgeAbstract
/**
* Feed for recent chapters of a specific work.
*/
private function collectWork($id)
private function collectWork($url)
{
$url = self::URI . "/works/$id/navigate";
$httpClient = RssBridge::getHttpClient();
$version = 'v0.0.1';
$response = $httpClient->request($url, [
'useragent' => "rss-bridge $version (https://github.com/RSS-Bridge/rss-bridge)",
]);
$headers = [
"useragent: rss-bridge $version (https://github.com/RSS-Bridge/rss-bridge)"
];
$response = getContents($url . '/navigate', $headers);
$html = \str_get_html($response->getBody());
$html = \str_get_html($response);
$html = defaultLinkTo($html, self::URI);
$response = getContents($url . '?view_full_work=true', $headers);
$workhtml = \str_get_html($response);
$workhtml = defaultLinkTo($workhtml, self::URI);
$this->title = $html->find('h2 a', 0)->plaintext;
foreach ($html->find('ol.index.group > li') as $element) {
$nav = $html->find('ol.index.group > li');
for ($i = 0; $i < count($nav); $i++) {
$item = [];
$element = $nav[$i];
$item['title'] = $element->find('a', 0)->plaintext;
$item['content'] = $element;
$item['content'] = $workhtml->find('#chapter-' . ($i + 1), 0);
$item['uri'] = $element->find('a', 0)->href;
$strdate = $element->find('span.datetime', 0)->plaintext;
@@ -138,4 +215,24 @@ class AO3Bridge extends BridgeAbstract
{
return self::URI . '/favicon.ico';
}
public function getURI()
{
$url = parent::getURI();
switch ($this->queriedContext) {
case 'Bookmarks':
$user = $this->getInput('user');
$url = self::URI
. '/users/' . $user
. '/bookmarks?bookmark_search[sort_column]=bookmarkable_date';
break;
case 'List':
$url = $this->getInput('url');
break;
case 'Work':
$url = self::URI . '/works/' . $this->getInput('id');
break;
}
return $url;
}
}

View File

@@ -40,6 +40,11 @@ class ARDMediathekBridge extends BridgeAbstract
* @const IMAGEWIDTHPLACEHOLDER
*/
const IMAGEWIDTHPLACEHOLDER = '{width}';
/**
* Title of the current show
* @var string
*/
private $title;
const PARAMETERS = [
[
@@ -72,7 +77,7 @@ class ARDMediathekBridge extends BridgeAbstract
}
}
$url = self::APIENDPOINT . $showID . '/?pageSize=' . self::PAGESIZE;
$url = self::APIENDPOINT . $showID . '?pageSize=' . self::PAGESIZE;
$rawJSON = getContents($url);
$processedJSON = json_decode($rawJSON);
@@ -93,6 +98,17 @@ class ARDMediathekBridge extends BridgeAbstract
$this->items[] = $item;
}
$this->title = $processedJSON->title;
date_default_timezone_set($oldTz);
}
/** {@inheritdoc} */
public function getName()
{
if (!empty($this->title)) {
return $this->title;
}
return parent::getName();
}
}

View File

@@ -0,0 +1,45 @@
<?php
class ActivisionResearchBridge extends BridgeAbstract
{
const NAME = 'Activision Research Blog';
const URI = 'https://research.activision.com';
const DESCRIPTION = 'Posts from the Activision Research blog';
const MAINTAINER = 'thefranke';
const CACHE_TIMEOUT = 86400; // 24h
public function collectData()
{
$dom = getSimpleHTMLDOM(static::URI);
$dom = $dom->find('div[id="home-blog-feed"]', 0);
if (!$dom) {
throw new \Exception(sprintf('Unable to find css selector on `%s`', $url));
}
$dom = defaultLinkTo($dom, $this->getURI());
foreach ($dom->find('div[class="blog-entry"]') as $article) {
$a = $article->find('a', 0);
$blogimg = extractFromDelimiters($article->find('div[class="blog-img"]', 0)->style, 'url(', ')');
$title = htmlspecialchars_decode($article->find('div[class="title"]', 0)->plaintext);
$author = htmlspecialchars_decode($article->find('div[class="author]', 0)->plaintext);
$date = $article->find('div[class="pubdate"]', 0)->plaintext;
$entry = getSimpleHTMLDOMCached($a->href, static::CACHE_TIMEOUT * 7 * 4);
$entry = defaultLinkTo($entry, $this->getURI());
$content = $entry->find('div[class="blog-body"]', 0);
$tagsremove = ['script', 'iframe', 'input', 'form'];
$content = sanitize($content, $tagsremove);
$content = '<img src="' . static::URI . $blogimg . '" alt="">' . $content;
$this->items[] = [
'title' => $title,
'author' => $author,
'uri' => $a->href,
'content' => $content,
'timestamp' => strtotime($date),
];
}
}
}

View File

@@ -13,12 +13,9 @@ class AllegroBridge extends BridgeAbstract
'exampleValue' => 'https://allegro.pl/kategoria/swieze-warzywa-cebula-318660',
'required' => true,
],
'sessioncookie' => [
'name' => 'The \'wdctx\' session cookie',
'title' => 'Paste the value of the \'wdctx\' cookie from your browser if you want to prevent Allegro imposing rate limits',
'pattern' => '^.{70,};?$',
// phpcs:ignore
'exampleValue' => 'v4.1-oCrmXTMqv2ppC21GTUCKLmUwRPP1ssQVALKuqwsZ1VXjcKgL2vO5TTRM5xMxS9GiyqxF1gAeyc-63dl0coUoBKXCXi_nAmr95yyqGpq2RAFoneZ4L399E8n6iYyemcuGARjAoSfjvLHJCEwvvHHynSgaxlFBu7hUnKfuy39zo9sSQdyTUjotJg3CAZ53q9v2raAnPCyGOAR4ytRILd9p24EJnxp7_oR0XbVPIo1hDa4WmjXFOxph8rHaO5tWd',
'cookie' => [
'name' => 'The complete cookie value',
'title' => 'Paste the value of the cookie value from your browser if you want to prevent Allegro imposing rate limits',
'required' => false,
],
'includeSponsoredOffers' => [
@@ -70,9 +67,9 @@ class AllegroBridge extends BridgeAbstract
$opts = [];
// If a session cookie is provided
if ($sessioncookie = $this->getInput('sessioncookie')) {
$opts[CURLOPT_COOKIE] = 'wdctx=' . $sessioncookie;
// If a cookie is provided
if ($cookie = $this->getInput('cookie')) {
$opts[CURLOPT_COOKIE] = $cookie;
}
$html = getSimpleHTMLDOM($url, [], $opts);
@@ -84,11 +81,11 @@ class AllegroBridge extends BridgeAbstract
$results = $html->find('article[data-analytics-view-custom-context="REGULAR"]');
if (!$this->getInput('includeSponsoredOffers')) {
if ($this->getInput('includeSponsoredOffers')) {
$results = array_merge($results, $html->find('article[data-analytics-view-custom-context="SPONSORED"]'));
}
if (!$this->getInput('includePromotedOffers')) {
if ($this->getInput('includePromotedOffers')) {
$results = array_merge($results, $html->find('article[data-analytics-view-custom-context="PROMOTED"]'));
}

278
bridges/AnfrBridge.php Normal file
View File

@@ -0,0 +1,278 @@
<?php
class AnfrBridge extends BridgeAbstract
{
const NAME = 'ANFR';
const URI = 'https://data.anfr.fr/';
const DESCRIPTION = 'Fetches data from the French administration "Agence Nationale des Fréquences".';
const CACHE_TIMEOUT = 604800; // 7d
const MAINTAINER = 'quent1';
const PARAMETERS = [
'Données sur les réseaux mobiles' => [
'departement' => [
'name' => 'Département',
'type' => 'list',
'values' => [
'Tous' => null,
'Ain' => '001',
'Aisne' => '002',
'Allier' => '003',
'Alpes-de-Haute-Provence' => '004',
'Hautes-Alpes' => '005',
'Alpes-Maritimes' => '006',
'Ardèche' => '007',
'Ardennes' => '008',
'Ariège' => '009',
'Aube' => '010',
'Aude' => '011',
'Aveyron' => '012',
'Bouches-du-Rhône' => '013',
'Calvados' => '014',
'Cantal' => '015',
'Charente' => '016',
'Charente-Maritime' => '017',
'Cher' => '018',
'Corrèze' => '019',
'Corse-du-Sud' => '02A',
'Haute-Corse' => '02B',
'Côte-d\'Or' => '021',
'Côtes-d\'Armor' => '022',
'Creuse' => '023',
'Dordogne' => '024',
'Doubs' => '025',
'Drôme' => '026',
'Eure' => '027',
'Eure-et-Loir' => '028',
'Finistère' => '029',
'Gard' => '030',
'Haute-Garonne' => '031',
'Gers' => '032',
'Gironde' => '033',
'Hérault' => '034',
'Ille-et-Vilaine' => '035',
'Indre' => '036',
'Indre-et-Loire' => '037',
'Isère' => '038',
'Jura' => '039',
'Landes' => '040',
'Loir-et-Cher' => '041',
'Loire' => '042',
'Haute-Loire' => '043',
'Loire-Atlantique' => '044',
'Loiret' => '045',
'Lot' => '046',
'Lot-et-Garonne' => '047',
'Lozère' => '048',
'Maine-et-Loire' => '049',
'Manche' => '050',
'Marne' => '051',
'Haute-Marne' => '052',
'Mayenne' => '053',
'Meurthe-et-Moselle' => '054',
'Meuse' => '055',
'Morbihan' => '056',
'Moselle' => '057',
'Nièvre' => '058',
'Nord' => '059',
'Oise' => '060',
'Orne' => '061',
'Pas-de-Calais' => '062',
'Puy-de-Dôme' => '063',
'Pyrénées-Atlantiques' => '064',
'Hautes-Pyrénées' => '065',
'Pyrénées-Orientales' => '066',
'Bas-Rhin' => '067',
'Haut-Rhin' => '068',
'Rhône' => '069',
'Haute-Saône' => '070',
'Saône-et-Loire' => '071',
'Sarthe' => '072',
'Savoie' => '073',
'Haute-Savoie' => '074',
'Paris' => '075',
'Seine-Maritime' => '076',
'Seine-et-Marne' => '077',
'Yvelines' => '078',
'Deux-Sèvres' => '079',
'Somme' => '080',
'Tarn' => '081',
'Tarn-et-Garonne' => '082',
'Var' => '083',
'Vaucluse' => '084',
'Vendée' => '085',
'Vienne' => '086',
'Haute-Vienne' => '087',
'Vosges' => '088',
'Yonne' => '089',
'Territoire de Belfort' => '090',
'Essonne' => '091',
'Hauts-de-Seine' => '092',
'Seine-Saint-Denis' => '093',
'Val-de-Marne' => '094',
'Val-d\'Oise' => '095',
'Guadeloupe' => '971',
'Martinique' => '972',
'Guyane' => '973',
'La Réunion' => '974',
'Saint-Pierre-et-Miquelon' => '975',
'Mayotte' => '976',
'Saint-Barthélemy' => '977',
'Saint-Martin' => '978',
'Terres australes et antarctiques françaises' => '984',
'Wallis-et-Futuna' => '986',
'Polynésie française' => '987',
'Nouvelle-Calédonie' => '988',
'Île de Clipperton' => '989'
]
],
'generation' => [
'name' => 'Génération',
'type' => 'list',
'values' => [
'Tous' => null,
'2G' => '2G',
'3G' => '3G',
'4G' => '4G',
'5G' => '5G',
]
],
'operateur' => [
'name' => 'Opérateur',
'type' => 'list',
'values' => [
'Tous' => null,
'Bouygues Télécom' => 'BOUYGUES TELECOM',
'Dauphin Télécom' => 'DAUPHIN TELECOM',
'Digiciel' => 'DIGICEL',
'Free Caraïbes' => 'FREE CARAIBES',
'Free Mobile' => 'FREE MOBILE',
'GLOBALTEL' => 'GLOBALTEL',
'Office des postes et télécommunications de Nouvelle Calédonie' => 'Gouv Nelle Calédonie (OPT)',
'Maore Mobile' => 'MAORE MOBILE',
'ONATi' => 'ONATI',
'Orange' => 'ORANGE',
'Outremer Telecom' => 'OUTREMER TELECOM',
'Vodafone polynésie' => 'PMT/VODAPHONE',
'SFR' => 'SFR',
'SPM Télécom' => 'SPM TELECOM',
'Service des Postes et Télécommunications de Polynésie Française' => 'Gouv Nelle Calédonie (OPT)',
'SRR' => 'SRR',
'Station étrangère' => 'Station étrangère',
'Telco OI' => 'TELCO IO',
'United Telecommunication Services Caraïbes' => 'UTS Caraibes',
'Ora Mobile' => 'VITI SAS',
'Zeop' => 'ZEOP'
]
],
'statut' => [
'name' => 'Statut',
'type' => 'list',
'values' => [
'Tous' => null,
'En service' => 'En service',
'Projet approuvé' => 'Projet approuvé',
'Techniquement opérationnel' => 'Techniquement opérationnel',
]
]
]
];
public function collectData()
{
$urlParts = [
'id' => 'observatoire_2g_3g_4g',
'resource_id' => '88ef0887-6b0f-4d3f-8545-6d64c8f597da',
'fields' => 'id,adm_lb_nom,sta_nm_dpt,emr_lb_systeme,generation,date_maj,sta_nm_anfr,adr_lb_lieu,adr_lb_add1,adr_lb_add2,adr_lb_add3,adr_nm_cp,statut',
'rows' => 10000
];
if (!empty($this->getInput('departement'))) {
$urlParts['refine.sta_nm_dpt'] = urlencode($this->getInput('departement'));
}
if (!empty($this->getInput('generation'))) {
$urlParts['refine.generation'] = $this->getInput('generation');
}
if (!empty($this->getInput('operateur'))) {
// http_build_query() already does urlencoding so this call is redundant
$urlParts['refine.adm_lb_nom'] = urlencode($this->getInput('operateur'));
}
if (!empty($this->getInput('statut'))) {
$urlParts['refine.statut'] = urlencode($this->getInput('statut'));
}
// API seems to not play well with urlencoded data
$url = urljoin(static::URI, '/d4c/api/records/1.0/download/?' . urldecode(http_build_query($urlParts)));
$json = getContents($url);
$data = Json::decode($json, false);
$records = $data->records;
$frequenciesByStation = [];
foreach ($records as $record) {
if (!isset($frequenciesByStation[$record->fields->sta_nm_anfr])) {
$street = sprintf(
'%s %s %s',
$record->fields->adr_lb_add1 ?? '',
$record->fields->adr_lb_add2 ?? '',
$record->fields->adr_lb_add3 ?? ''
);
$frequenciesByStation[$record->fields->sta_nm_anfr] = [
'id' => $record->fields->sta_nm_anfr,
'operator' => $record->fields->adm_lb_nom,
'frequencies' => [],
'lastUpdate' => 0,
'address' => [
'street' => trim($street),
'postCode' => $record->fields->adr_nm_cp,
'city' => $record->fields->adr_lb_lieu
]
];
}
$frequenciesByStation[$record->fields->sta_nm_anfr]['frequencies'][] = [
'generation' => $record->fields->generation,
'frequency' => $record->fields->emr_lb_systeme,
'status' => $record->fields->statut,
'updatedAt' => strtotime($record->fields->date_maj),
];
$frequenciesByStation[$record->fields->sta_nm_anfr]['lastUpdate'] = max(
$frequenciesByStation[$record->fields->sta_nm_anfr]['lastUpdate'],
strtotime($record->fields->date_maj)
);
}
usort($frequenciesByStation, static fn ($a, $b) => $b['lastUpdate'] <=> $a['lastUpdate']);
foreach ($frequenciesByStation as $station) {
$title = sprintf(
'[%s] Mise à jour de la station n°%s à %s (%s)',
$station['operator'],
$station['id'],
$station['address']['city'],
$station['address']['postCode']
);
$array_reduce = array_reduce($station['frequencies'], static function ($carry, $frequency) {
return sprintf('%s<li>%s : %s</li>', $carry, $frequency['frequency'], $frequency['status']);
}, '');
$content = sprintf(
'<h1>Adresse complète</h1><p>%s<br>%s<br>%s</p><h1>Fréquences</h1><p><ul>%s</ul></p>',
$station['address']['street'],
$station['address']['postCode'],
$station['address']['city'],
$array_reduce
);
$this->items[] = [
'uid' => $station['id'],
'timestamp' => $station['lastUpdate'],
'title' => $title,
'content' => $content,
];
}
}
}

View File

@@ -0,0 +1,87 @@
<?php
class AnisearchBridge extends BridgeAbstract
{
const MAINTAINER = 'Tone866';
const NAME = 'Anisearch';
const URI = 'https://www.anisearch.de';
const CACHE_TIMEOUT = 1800; // 30min
const DESCRIPTION = 'Feed for Anisearch';
const PARAMETERS = [[
'category' => [
'name' => 'Dub',
'type' => 'list',
'values' => [
'DE'
=> 'https://www.anisearch.de/anime/index/page-1?char=all&synchro=de&sort=date&order=desc&view=4',
'EN'
=> 'https://www.anisearch.de/anime/index/page-1?char=all&synchro=en&sort=date&order=desc&view=4',
'JP'
=> 'https://www.anisearch.de/anime/index/page-1?char=all&synchro=ja&sort=date&order=desc&view=4'
]
],
'trailers' => [
'name' => 'Trailers',
'type' => 'checkbox',
'title' => 'Will include trailes',
'defaultValue' => false
]
]];
public function collectData()
{
$baseurl = 'https://www.anisearch.de/';
$trailers = false;
$trailers = $this->getInput('trailers');
$limit = 10;
if ($trailers) {
$limit = 5;
}
$dom = getSimpleHTMLDOM($this->getInput('category'));
foreach ($dom->find('li.btype0') as $key => $li) {
if ($key >= $limit) {
break;
}
$a = $li->find('a', 0);
$title = $a->find('span.title', 0);
$url = $baseurl . $a->href;
//get article
$domarticle = getSimpleHTMLDOM($url);
$content = $domarticle->find('div.details-text', 0);
//get header-image and set absolute src
$headerimage = $domarticle->find('img#details-cover', 0);
$src = $headerimage->src;
foreach ($content->find('.hidden') as $element) {
$element->remove();
}
//get trailer
$ytlink = '';
if ($trailers) {
$trailerlink = $domarticle->find('section#trailers > div > div.swiper > ul.swiper-wrapper > li.swiper-slide > a', 0);
if (isset($trailerlink)) {
$trailersite = getSimpleHTMLDOM($baseurl . $trailerlink->href);
$trailer = $trailersite->find('div#player > iframe', 0);
$trailer = $trailer->{'data-xsrc'};
$ytlink = <<<EOT
<br /><iframe width="560" height="315" src="$trailer" title="YouTube video player"
frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share"
referrerpolicy="strict-origin-when-cross-origin" allowfullscreen></iframe>
EOT;
}
}
$this->items[] = [
'title' => $title->plaintext,
'uri' => $url,
'content' => $headerimage . '<br />' . $content . $ytlink
];
}
}
}

View File

@@ -126,28 +126,36 @@ class AnnasArchiveBridge extends BridgeAbstract
return;
}
foreach ($list->find('.w-full > .mb-4 > div > a') as $element) {
$item = [];
$item['title'] = $element->find('h3', 0)->plaintext;
$item['author'] = $element->find('div.italic', 0)->plaintext;
$item['uri'] = $element->href;
$item['content'] = $element->plaintext;
$item['uid'] = $item['uri'];
$elements = $list->find('.w-full > .mb-4 > div');
foreach ($elements as $element) {
// stop added entries once partial match list starts
if (str_contains($element->innertext, 'partial match')) {
break;
}
if ($element = $element->find('a', 0)) {
$item = [];
$item['title'] = $element->find('h3', 0)->plaintext;
$item['author'] = $element->find('div.italic', 0)->plaintext;
$item['uri'] = $element->href;
$item['content'] = $element->plaintext;
$item['uid'] = $item['uri'];
if ($item_html = getSimpleHTMLDOMCached($item['uri'])) {
$item_html = defaultLinkTo($item_html, self::URI);
$item['content'] .= $item_html->find('main img', 0);
$item['content'] .= $item_html->find('main .mt-4', 0); // Summary
if ($links = $item_html->find('main ul.mb-4', -1)) {
foreach ($links->find('li > a.js-download-link') as $file) {
$item['enclosures'][] = $file->href;
$item_html = getSimpleHTMLDOMCached($item['uri'], 86400 * 20);
if ($item_html) {
$item_html = defaultLinkTo($item_html, self::URI);
$item['content'] .= $item_html->find('main img', 0);
$item['content'] .= $item_html->find('main .mt-4', 0); // Summary
foreach ($item_html->find('main ul.mb-4 > li > a.js-download-link') as $file) {
if (!str_contains($file->href, 'fast_download')) {
$item['enclosures'][] = $file->href;
}
}
// Remove bulk torrents from enclosures list
$item['enclosures'] = array_diff($item['enclosures'], [self::URI . 'datasets']);
}
}
$this->items[] = $item;
$this->items[] = $item;
}
}
}

View File

@@ -18,9 +18,45 @@ class AppleMusicBridge extends BridgeAbstract
'required' => true,
],
]];
const CACHE_TIMEOUT = 21600; // 6 hours
const CACHE_TIMEOUT = 60 * 60 * 6; // 6 hours
private $title;
public function collectData()
{
$items = $this->getJson();
$artist = $this->getArtist($items);
$this->title = $artist->artistName;
foreach ($items as $item) {
if ($item->wrapperType === 'collection') {
$copyright = $item->copyright ?? '';
$artworkUrl500 = str_replace('/100x100', '/500x500', $item->artworkUrl100);
$artworkUrl2000 = str_replace('/100x100', '/2000x2000', $item->artworkUrl100);
$escapedCollectionName = htmlspecialchars($item->collectionName);
$this->items[] = [
'title' => $item->collectionName,
'uri' => $item->collectionViewUrl,
'timestamp' => $item->releaseDate,
'enclosures' => $artworkUrl500,
'author' => $item->artistName,
'content' => "<figure>
<img srcset=\"$item->artworkUrl60 60w, $item->artworkUrl100 100w, $artworkUrl500 500w, $artworkUrl2000 2000w\"
sizes=\"100%\" src=\"$artworkUrl2000\"
alt=\"Cover of $escapedCollectionName\"
style=\"display: block; margin: 0 auto;\" />
<figcaption>
from <a href=\"$artist->artistLinkUrl\">$item->artistName</a><br />$copyright
</figcaption>
</figure>",
];
}
}
}
private function getJson()
{
# Limit the amount of releases to 50
if ($this->getInput('limit') > 50) {
@@ -29,31 +65,53 @@ class AppleMusicBridge extends BridgeAbstract
$limit = $this->getInput('limit');
}
$url = 'https://itunes.apple.com/lookup?id='
. $this->getInput('artist')
. '&entity=album&limit='
. $limit .
'&sort=recent';
$url = 'https://itunes.apple.com/lookup?id=' . $this->getInput('artist') . '&entity=album&limit=' . $limit . '&sort=recent';
$html = getSimpleHTMLDOM($url);
$json = json_decode($html);
$result = $json->results;
foreach ($json->results as $obj) {
if ($obj->wrapperType === 'collection') {
$copyright = $obj->copyright ?? '';
$this->items[] = [
'title' => $obj->artistName . ' - ' . $obj->collectionName,
'uri' => $obj->collectionViewUrl,
'timestamp' => $obj->releaseDate,
'enclosures' => $obj->artworkUrl100,
'content' => '<a href=' . $obj->collectionViewUrl
. '><img src="' . $obj->artworkUrl100 . '" /></a><br><br>'
. $obj->artistName . ' - ' . $obj->collectionName
. '<br>'
. $copyright,
];
}
if (!is_array($result) || count($result) == 0) {
returnServerError('There is no artist with id "' . $this->getInput('artist') . '".');
}
return $result;
}
private function getArtist($json)
{
$nameArray = array_filter($json, function ($obj) {
return $obj->wrapperType == 'artist';
});
if (count($nameArray) === 1) {
return $nameArray[0];
}
return parent::getName();
}
public function getName()
{
if (isset($this->title)) {
return $this->title;
}
return parent::getName();
}
public function getIcon()
{
if (empty($this->getInput('artist'))) {
return parent::getIcon();
}
// it isn't necessary to set the correct artist name into the url
$url = 'https://music.apple.com/us/artist/jon-bellion/' . $this->getInput('artist');
$html = getSimpleHTMLDOMCached($url);
$image = $html->find('meta[property="og:image"]', 0)->content;
$imageUpdatedSize = preg_replace('/\/\d*x\d*cw/i', '/144x144-999', $image);
return $imageUpdatedSize;
}
}

View File

@@ -37,35 +37,82 @@ class ArsTechnicaBridge extends FeedExpander
{
$item_html = getSimpleHTMLDOMCached($item['uri']);
$item_html = defaultLinkTo($item_html, self::URI);
$item['content'] = $item_html->find('.article-content', 0);
$pages = $item_html->find('nav.page-numbers > .numbers > a', -2);
if (null !== $pages) {
for ($i = 2; $i <= $pages->innertext; $i++) {
$page_url = $item['uri'] . '&page=' . $i;
$page_html = getSimpleHTMLDOMCached($page_url);
$page_html = defaultLinkTo($page_html, self::URI);
$item['content'] .= $page_html->find('.article-content', 0);
$content = '';
$header = $item_html->find('article header', 0);
$leading = $header->find('p[class*=leading]', 0);
if ($leading != null) {
$content .= '<p>' . $leading->innertext . '</p>';
}
$intro_image = $header->find('img.intro-image', 0);
if ($intro_image != null) {
$content .= '<figure>' . $intro_image;
$image_caption = $header->find('.caption .caption-content', 0);
if ($image_caption != null) {
$content .= '<figcaption>' . $image_caption->innertext . '</figcaption>';
}
$item['content'] = str_get_html($item['content']);
$content .= '</figure>';
}
foreach ($item_html->find('.post-content') as $content_tag) {
$content .= $content_tag->innertext;
}
$item['content'] = str_get_html($content);
$parsely = $item_html->find('[name="parsely-page"]', 0);
$parsely_json = json_decode(html_entity_decode($parsely->content), true);
$item['categories'] = $parsely_json['tags'];
// Some lightboxes are nested in figures. I'd guess that's a
// bug in the website
foreach ($item['content']->find('figure div div.ars-lightbox') as $weird_lightbox) {
$weird_lightbox->parent->parent->outertext = $weird_lightbox;
}
// It's easier to reconstruct the whole thing than remove
// duplicate reactive tags
foreach ($item['content']->find('.ars-lightbox') as $lightbox) {
$lightbox_content = '';
foreach ($lightbox->find('.ars-lightbox-item') as $lightbox_item) {
$img = $lightbox_item->find('img', 0);
if ($img != null) {
$lightbox_content .= '<figure>' . $img;
$caption = $lightbox_item->find('div.pswp-caption-content', 0);
if ($caption != null) {
$credit = $lightbox_item->find('div.ars-gallery-caption-credit', 0);
if ($credit != null) {
$credit->innertext = 'Credit: ' . $credit->innertext;
}
$lightbox_content .= '<figcaption>' . $caption->innertext . '</figcaption>';
}
$lightbox_content .= '</figure>';
}
}
$lightbox->innertext = $lightbox_content;
}
// remove various ars advertising
$item['content']->find('#social-left', 0)->remove();
foreach ($item['content']->find('.ars-component-buy-box') as $ad) {
foreach ($item['content']->find('.ars-interlude-container') as $ad) {
$ad->remove();
}
foreach ($item['content']->find('.ad_wrapper') as $ad) {
$ad->remove();
foreach ($item['content']->find('.toc-container') as $toc) {
$toc->remove();
}
foreach ($item['content']->find('.sidebar') as $ad) {
$ad->remove();
// Mostly YouTube videos
$iframes = $item['content']->find('iframe');
foreach ($iframes as $iframe) {
$iframe->outertext = '<a href="' . $iframe->src . '">' . $iframe->src . '</a>';
}
// This fixed padding around the former iframes and actual inline videos
foreach ($item['content']->find('div[style*=aspect-ratio]') as $styled) {
$styled->removeAttribute('style');
}
$item['content'] = backgroundToImg($item['content']);
$item['uid'] = explode('=', $item['uri'])[1];
$item['uid'] = strval($parsely_json['post_id']);
return $item;
}
}

View File

@@ -45,7 +45,6 @@ class AsahiShimbunAJWBridge extends BridgeAbstract
foreach ($html->find('#MainInner li a') as $element) {
if ($element->parent()->class == 'HeadlineTopImage-S') {
Debug::log('Skip Headline, it is repeated below');
continue;
}
$item = [];

View File

@@ -8,6 +8,7 @@ class BMDSystemhausBlogBridge extends BridgeAbstract
const URI = 'https://www.bmd.com';
const DONATION_URI = 'https://paypal.me/cntools';
const DESCRIPTION = 'BMD Systemhaus - We make business easy';
const BMD_FAV_ICON = 'https://www.bmd.com/favicon.ico';
const ITEMSTYLE = [
'ilcr' => '<table width="100%"><tr><td style="vertical-align: top;">{data_img}</td><td style="vertical-align: top;">{data_content}</td></tr></table>',
@@ -53,7 +54,7 @@ class BMDSystemhausBlogBridge extends BridgeAbstract
public function collectData()
{
// get website content
$html = getSimpleHTMLDOM($this->getURI()) or returnServerError('No contents received!');
$html = getSimpleHTMLDOM($this->getURI());
// Convert relative links in HTML into absolute links
$html = defaultLinkTo($html, self::URI);
@@ -148,32 +149,73 @@ class BMDSystemhausBlogBridge extends BridgeAbstract
return null;
}
if ($parsedUrl->getHost() != 'www.bmd.com') {
if (!in_array($parsedUrl->getHost(), ['www.bmd.com', 'bmd.com'])) {
return null;
}
$path = explode('/', $parsedUrl->getPath());
$lang = '';
if ($this->getURIbyCountry($path[1]) == '') {
return null;
// extract language from url
$path = explode('/', $parsedUrl->getPath());
if (count($path) > 1) {
$lang = $path[1];
// validate data
if ($this->getURIbyCountry($lang) == '') {
$lang = '';
}
}
// if no country available, find language by browser
if ($lang == '') {
$srvLanguages = explode(';', $_SERVER['HTTP_ACCEPT_LANGUAGE']);
if (count($srvLanguages) > 0) {
$languages = explode(',', $srvLanguages[0]);
if (count($languages) > 0) {
for ($i = 0; $i < count($languages); $i++) {
$langDetails = explode('-', $languages[$i]);
if (count($langDetails) > 1) {
$lang = $langDetails[1];
} else {
$lang = substr($srvLanguages[0], 0, 2);
}
// validate data
if ($this->getURIbyCountry($lang) == '') {
$lang = '';
}
if ($lang != '') {
break;
}
}
}
}
}
// if no URL found by language, use AT as default
if ($this->getURIbyCountry($lang) == '') {
$lang = 'at';
}
$params = [];
$params['country'] = $path[1];
$params['country'] = strtolower($lang);
return $params;
}
//-----------------------------------------------------
public function getURI()
{
$lURI = $this->getURIbyCountry($this->getInput('country'));
$country = $this->getInput('country') ?? '';
$lURI = $this->getURIbyCountry($country);
return $lURI != '' ? $lURI : parent::getURI();
}
//-----------------------------------------------------
public function getIcon()
{
return 'https://www.bmd.com/favicon.ico';
return self::BMD_FAV_ICON;
}
//-----------------------------------------------------
@@ -192,7 +234,7 @@ class BMDSystemhausBlogBridge extends BridgeAbstract
//-----------------------------------------------------
private function getURIbyCountry($country)
{
switch ($country) {
switch (strtolower($country)) {
case 'at':
return 'https://www.bmd.com/at/ueber-bmd/blog-ohne-filter.html';
case 'de':

View File

@@ -284,8 +284,7 @@ class BadDragonBridge extends BridgeAbstract
case 'Clearance':
$toyData = json_decode(getContents($this->inputToURL(true)));
$productList = json_decode(getContents(self::URI
. 'api/inventory-toy/product-list'));
$productList = json_decode(getContents(self::URI . 'api/inventory-toy/product-list'));
foreach ($toyData->toys as $toy) {
$item = [];

View File

@@ -111,12 +111,12 @@ class BandcampBridge extends BridgeAbstract
$url = self::URI . 'api/hub/1/dig_deeper';
$data = $this->buildRequestJson();
$header = [
'Content-Type: application/json',
'Content-Length: ' . strlen($data)
'Content-Type: application/json',
'Content-Length: ' . strlen($data),
];
$opts = [
CURLOPT_CUSTOMREQUEST => 'POST',
CURLOPT_POSTFIELDS => $data
CURLOPT_CUSTOMREQUEST => 'POST',
CURLOPT_POSTFIELDS => $data,
];
$content = getContents($url, $header, $opts);
@@ -314,7 +314,8 @@ class BandcampBridge extends BridgeAbstract
{
$url = self::URI . 'api/' . $endpoint . '?' . http_build_query($query_data);
// todo: 429 Too Many Requests happens a lot
$data = json_decode(getContents($url));
$response = getContents($url);
$data = json_decode($response);
return $data;
}

View File

@@ -37,7 +37,7 @@ class BlizzardNewsBridge extends XPathAbstract
const XPATH_EXPRESSION_ITEM = '/html/body/div/div[4]/div[2]/div[2]/div/div/section/ol/li/article';
const XPATH_EXPRESSION_ITEM_TITLE = './/div/div[2]/h2';
const XPATH_EXPRESSION_ITEM_CONTENT = './/div[@class="ArticleListItem-description"]/div[@class="h6"]';
const XPATH_EXPRESSION_ITEM_CONTENT = './/div[@class="ArticleListItem-description"]/div[@class="h6"]/text()';
const XPATH_EXPRESSION_ITEM_URI = './/a[@class="ArticleLink ArticleLink"]/@href';
const XPATH_EXPRESSION_ITEM_AUTHOR = '';
const XPATH_EXPRESSION_ITEM_TIMESTAMP = './/time[@class="ArticleListItem-footerTimestamp"]/@timestamp';
@@ -57,4 +57,11 @@ class BlizzardNewsBridge extends XPathAbstract
}
return 'https://news.blizzard.com/' . $locale;
}
public function getIcon()
{
return <<<icon
https://blznews.akamaized.net/images/favicon-cb34a003c6f2f637ee8f4f7b406f3b9b120b918c04cabec7f03a760e708977ea9689a1c638f4396def8dce7b202cd007eae91946cc3c4a578aa8b5694226cfc6.ico
icon;
}
}

230
bridges/BlueskyBridge.php Normal file
View File

@@ -0,0 +1,230 @@
<?php
class BlueskyBridge extends BridgeAbstract
{
const NAME = 'Bluesky';
const URI = 'https://bsky.app';
const DESCRIPTION = 'Fetches posts from Bluesky';
const MAINTAINER = 'Code modified from rsshub (TonyRL https://github.com/TonyRL) and expanded';
const PARAMETERS = [
[
'data_source' => [
'name' => 'Bluesky Data Source',
'type' => 'list',
'defaultValue' => 'Profile',
'values' => [
'Profile' => 'getAuthorFeed',
],
'title' => 'Select the type of data source to fetch from Bluesky.'
],
'handle' => [
'name' => 'User Handle',
'type' => 'text',
'required' => true,
'exampleValue' => 'jackdodo.bsky.social',
'title' => 'Handle found in URL'
],
'filter' => [
'name' => 'Filter',
'type' => 'list',
'defaultValue' => 'posts_and_author_threads',
'values' => [
'posts_and_author_threads' => 'posts_and_author_threads',
'posts_with_replies' => 'posts_with_replies',
'posts_no_replies' => 'posts_no_replies',
'posts_with_media' => 'posts_with_media',
],
'title' => 'Combinations of post/repost types to include in response.'
]
]
];
private $profile;
public function getName()
{
if (isset($this->profile)) {
return sprintf('%s (@%s) - Bluesky', $this->profile['displayName'], $this->profile['handle']);
}
return parent::getName();
}
public function getURI()
{
if (isset($this->profile)) {
return self::URI . '/profile/' . $this->profile['handle'];
}
return parent::getURI();
}
public function getIcon()
{
if (isset($this->profile)) {
return $this->profile['avatar'];
}
return parent::getIcon();
}
public function getDescription()
{
if (isset($this->profile)) {
return $this->profile['description'];
}
return parent::getDescription();
}
private function parseExternal($external, $did)
{
$description = '';
$externalUri = $external['uri'];
$externalTitle = htmlspecialchars($external['title'], ENT_QUOTES, 'UTF-8');
$externalDescription = htmlspecialchars($external['description'], ENT_QUOTES, 'UTF-8');
$thumb = $external['thumb'] ?? null;
if (preg_match('/youtube\.com\/watch\?v=([^\&\?\/]+)/', $externalUri, $id) || preg_match('/youtu\.be\/([^\&\?\/]+)/', $externalUri, $id)) {
$videoId = $id[1];
$description .= "<p>External Link: <a href=\"$externalUri\">$externalTitle</a></p>";
$description .= "<iframe width=\"560\" height=\"315\" src=\"https://www.youtube.com/embed/$videoId\" frameborder=\"0\" allowfullscreen></iframe>";
} else {
$description .= "<p>External Link: <a href=\"$externalUri\">$externalTitle</a></p>";
$description .= "<p>$externalDescription</p>";
if ($thumb) {
$thumbUrl = 'https://cdn.bsky.app/img/feed_thumbnail/plain/' . $did . '/' . $thumb['ref']['$link'] . '@jpeg';
$description .= "<p><a href=\"$externalUri\"><img src=\"$thumbUrl\" alt=\"External Thumbnail\" /></a></p>";
}
}
return $description;
}
private function textToDescription($text)
{
$text = nl2br(htmlspecialchars($text, ENT_QUOTES, 'UTF-8'));
$text = preg_replace('/(https?:\/\/[^\s]+)/i', '<a href="$1">$1</a>', $text);
return $text;
}
public function collectData()
{
$handle = $this->getInput('handle');
$filter = $this->getInput('filter') ?: 'posts_and_author_threads';
$did = $this->resolveHandle($handle);
$this->profile = $this->getProfile($did);
$authorFeed = $this->getAuthorFeed($did, $filter);
foreach ($authorFeed['feed'] as $post) {
$item = [];
$item['uri'] = self::URI . '/profile/' . $post['post']['author']['handle'] . '/post/' . explode('app.bsky.feed.post/', $post['post']['uri'])[1];
$item['title'] = strtok($post['post']['record']['text'], "\n");
$item['timestamp'] = strtotime($post['post']['record']['createdAt']);
$item['author'] = $this->profile['displayName'];
$description = $this->textToDescription($post['post']['record']['text']);
// Retrieve DID for constructing image URLs
$authorDid = $post['post']['author']['did'];
if (isset($post['post']['record']['embed']['$type']) && $post['post']['record']['embed']['$type'] === 'app.bsky.embed.external') {
$description .= $this->parseExternal($post['post']['record']['embed']['external'], $authorDid);
}
if (isset($post['post']['record']['embed']['$type']) && $post['post']['record']['embed']['$type'] === 'app.bsky.embed.video') {
$thumbnail = $post['post']['embed']['thumbnail'] ?? null;
if ($thumbnail) {
$itemUri = self::URI . '/profile/' . $post['post']['author']['handle'] . '/post/' . explode('app.bsky.feed.post/', $post['post']['uri'])[1];
$description .= "<p><a href=\"$itemUri\"><img src=\"$thumbnail\" alt=\"Video Thumbnail\" /></a></p>";
}
}
if (isset($post['post']['record']['embed']['$type']) && $post['post']['record']['embed']['$type'] === 'app.bsky.embed.recordWithMedia#view') {
$thumbnail = $post['post']['embed']['media']['thumbnail'] ?? null;
$playlist = $post['post']['embed']['media']['playlist'] ?? null;
if ($thumbnail) {
$description .= "<p><video controls poster=\"$thumbnail\">";
$description .= "<source src=\"$playlist\" type=\"application/x-mpegURL\">";
$description .= 'Video source not supported</video></p>';
}
}
if (!empty($post['post']['record']['embed']['images'])) {
foreach ($post['post']['record']['embed']['images'] as $image) {
$linkRef = $image['image']['ref']['$link'];
$thumbnailUrl = $this->resolveThumbnailUrl($authorDid, $linkRef);
$fullsizeUrl = $this->resolveFullsizeUrl($authorDid, $linkRef);
$description .= "<br /><br /><a href=\"$fullsizeUrl\"><img src=\"$thumbnailUrl\" alt=\"Image\"></a>";
}
}
// Enhanced handling for quote posts with images
if (isset($post['post']['record']['embed']) && $post['post']['record']['embed']['$type'] === 'app.bsky.embed.record') {
$quotedRecord = $post['post']['record']['embed']['record'];
$quotedAuthor = $post['post']['embed']['record']['author']['handle'] ?? null;
$quotedDisplayName = $post['post']['embed']['record']['author']['displayName'] ?? null;
$quotedText = $post['post']['embed']['record']['value']['text'] ?? null;
if ($quotedAuthor && isset($quotedRecord['uri'])) {
$parts = explode('/', $quotedRecord['uri']);
$quotedPostId = end($parts);
$quotedPostUri = self::URI . '/profile/' . $quotedAuthor . '/post/' . $quotedPostId;
}
if ($quotedText) {
$description .= '<hr /><strong>Quote from ' . htmlspecialchars($quotedDisplayName) . ' (@ ' . htmlspecialchars($quotedAuthor) . '):</strong><br />';
$description .= $this->textToDescription($quotedText);
if (isset($quotedPostUri)) {
$description .= "<p><a href=\"$quotedPostUri\">View original quote post</a></p>";
}
}
}
if (isset($post['post']['embed']['record']['value']['embed']['images'])) {
$quotedImages = $post['post']['embed']['record']['value']['embed']['images'];
foreach ($quotedImages as $image) {
$linkRef = $image['image']['ref']['$link'] ?? null;
if ($linkRef) {
$quotedAuthorDid = $post['post']['embed']['record']['author']['did'] ?? null;
$thumbnailUrl = $this->resolveThumbnailUrl($quotedAuthorDid, $linkRef);
$fullsizeUrl = $this->resolveFullsizeUrl($quotedAuthorDid, $linkRef);
$description .= "<br /><br /><a href=\"$fullsizeUrl\"><img src=\"$thumbnailUrl\" alt=\"Quoted Image\"></a>";
}
}
}
$item['content'] = $description;
$this->items[] = $item;
}
}
private function resolveHandle($handle)
{
$uri = 'https://public.api.bsky.app/xrpc/com.atproto.identity.resolveHandle?handle=' . urlencode($handle);
$response = json_decode(getContents($uri), true);
return $response['did'];
}
private function getProfile($did)
{
$uri = 'https://public.api.bsky.app/xrpc/app.bsky.actor.getProfile?actor=' . urlencode($did);
$response = json_decode(getContents($uri), true);
return $response;
}
private function getAuthorFeed($did, $filter)
{
$uri = 'https://public.api.bsky.app/xrpc/app.bsky.feed.getAuthorFeed?actor=' . urlencode($did) . '&filter=' . urlencode($filter) . '&limit=30';
$response = json_decode(getContents($uri), true);
return $response;
}
private function resolveThumbnailUrl($authorDid, $linkRef)
{
return 'https://cdn.bsky.app/img/feed_thumbnail/plain/' . $authorDid . '/' . $linkRef . '@jpeg';
}
private function resolveFullsizeUrl($authorDid, $linkRef)
{
return 'https://cdn.bsky.app/img/feed_fullsize/plain/' . $authorDid . '/' . $linkRef . '@jpeg';
}
}

218
bridges/BodaccBridge.php Normal file
View File

@@ -0,0 +1,218 @@
<?php
class BodaccBridge extends BridgeAbstract
{
const NAME = 'BODACC';
const URI = 'https://bodacc-datadila.opendatasoft.com/';
const DESCRIPTION = 'Fetches announces from the French Government "Bulletin Officiel Des Annonces Civiles et Commerciales".';
const CACHE_TIMEOUT = 86400;
const MAINTAINER = 'quent1';
const PARAMETERS = [
'Annonces commerciales' => [
'departement' => [
'name' => 'Département',
'type' => 'list',
'values' => [
'Tous' => null,
'Ain' => '01',
'Aisne' => '02',
'Allier' => '03',
'Alpes-de-Haute-Provence' => '04',
'Hautes-Alpes' => '05',
'Alpes-Maritimes' => '06',
'Ardèche' => '07',
'Ardennes' => '08',
'Ariège' => '09',
'Aube' => '10',
'Aude' => '11',
'Aveyron' => '12',
'Bouches-du-Rhône' => '13',
'Calvados' => '14',
'Cantal' => '15',
'Charente' => '16',
'Charente-Maritime' => '17',
'Cher' => '18',
'Corrèze' => '19',
'Corse-du-Sud' => '2A',
'Haute-Corse' => '2B',
'Côte-d\'Or' => '21',
'Côtes-d\'Armor' => '22',
'Creuse' => '23',
'Dordogne' => '24',
'Doubs' => '25',
'Drôme' => '26',
'Eure' => '27',
'Eure-et-Loir' => '28',
'Finistère' => '29',
'Gard' => '30',
'Haute-Garonne' => '31',
'Gers' => '32',
'Gironde' => '33',
'Hérault' => '34',
'Ille-et-Vilaine' => '35',
'Indre' => '36',
'Indre-et-Loire' => '37',
'Isère' => '38',
'Jura' => '39',
'Landes' => '40',
'Loir-et-Cher' => '41',
'Loire' => '42',
'Haute-Loire' => '43',
'Loire-Atlantique' => '44',
'Loiret' => '45',
'Lot' => '46',
'Lot-et-Garonne' => '47',
'Lozère' => '48',
'Maine-et-Loire' => '49',
'Manche' => '50',
'Marne' => '51',
'Haute-Marne' => '52',
'Mayenne' => '53',
'Meurthe-et-Moselle' => '54',
'Meuse' => '55',
'Morbihan' => '56',
'Moselle' => '57',
'Nièvre' => '58',
'Nord' => '59',
'Oise' => '60',
'Orne' => '61',
'Pas-de-Calais' => '62',
'Puy-de-Dôme' => '63',
'Pyrénées-Atlantiques' => '64',
'Hautes-Pyrénées' => '65',
'Pyrénées-Orientales' => '66',
'Bas-Rhin' => '67',
'Haut-Rhin' => '68',
'Rhône' => '69',
'Haute-Saône' => '70',
'Saône-et-Loire' => '71',
'Sarthe' => '72',
'Savoie' => '73',
'Haute-Savoie' => '74',
'Paris' => '75',
'Seine-Maritime' => '76',
'Seine-et-Marne' => '77',
'Yvelines' => '78',
'Deux-Sèvres' => '79',
'Somme' => '80',
'Tarn' => '81',
'Tarn-et-Garonne' => '82',
'Var' => '83',
'Vaucluse' => '84',
'Vendée' => '85',
'Vienne' => '86',
'Haute-Vienne' => '87',
'Vosges' => '88',
'Yonne' => '89',
'Territoire de Belfort' => '90',
'Essonne' => '91',
'Hauts-de-Seine' => '92',
'Seine-Saint-Denis' => '93',
'Val-de-Marne' => '94',
'Val-d\'Oise' => '95',
'Guadeloupe' => '971',
'Martinique' => '972',
'Guyane' => '973',
'La Réunion' => '974',
'Saint-Pierre-et-Miquelon' => '975',
'Mayotte' => '976',
'Saint-Barthélemy' => '977',
'Saint-Martin' => '978',
'Terres australes et antarctiques françaises' => '984',
'Wallis-et-Futuna' => '986',
'Polynésie française' => '987',
'Nouvelle-Calédonie' => '988',
'Île de Clipperton' => '989'
]
],
'famille' => [
'name' => 'Famille',
'type' => 'list',
'values' => [
'Toutes' => null,
'Annonces diverses' => 'divers',
'Créations' => 'creation',
'Dépôts des comptes' => 'dpc',
'Immatriculations' => 'immatriculation',
'Modifications diverses' => 'modification',
'Procédures collectives' => 'collective',
'Procédures de conciliation' => 'conciliation',
'Procédures de rétablissement professionnel' => 'retablissement_professionnel',
'Radiations' => 'radiation',
'Ventes et cessions' => 'vente'
]
],
'type' => [
'name' => 'Type',
'type' => 'list',
'values' => [
'Tous' => null,
'Avis initial' => 'annonce',
'Avis d\'annulation' => 'annulation',
'Avis rectificatif' => 'rectificatif'
]
]
]
];
public function collectData()
{
$parameters = [
'select' => 'id,dateparution,typeavis_lib,familleavis_lib,commercant,ville,cp',
'order_by' => 'id desc',
'limit' => 50,
];
$where = [];
if (!empty($this->getInput('departement'))) {
$where[] = 'numerodepartement="' . $this->getInput('departement') . '"';
}
if (!empty($this->getInput('famille'))) {
$where[] = 'familleavis="' . $this->getInput('famille') . '"';
}
if (!empty($this->getInput('type'))) {
$where[] = 'typeavis="' . $this->getInput('type') . '"';
}
if ($where !== []) {
$parameters['where'] = implode(' and ', $where);
}
$url = urljoin(self::URI, '/api/explore/v2.1/catalog/datasets/annonces-commerciales/records?' . http_build_query($parameters));
$data = Json::decode(getContents($url), false);
foreach ($data->results as $result) {
if (
!isset(
$result->id,
$result->dateparution,
$result->typeavis_lib,
$result->familleavis_lib,
$result->commercant,
$result->ville,
$result->cp
)
) {
continue;
}
$title = sprintf(
'[%s] %s - %s à %s (%s)',
$result->typeavis_lib,
$result->familleavis_lib,
$result->commercant,
$result->ville,
$result->cp
);
$this->items[] = [
'uid' => $result->id,
'timestamp' => strtotime($result->dateparution),
'title' => $title,
];
}
}
}

View File

@@ -1218,14 +1218,15 @@ EOT;
$table = $this->generateEventDetailsTable($event);
$imgsrc = $event['BannerURL'];
$FShareURL = $event['FShareURL'];
return <<<EOT
<img title="Event Banner URL" src="$imgsrc"></img>
<br>
$table
<br>
More Details are available on the <a href="${event['FShareURL']}">BookMyShow website</a>.
EOT;
<img title="Event Banner URL" src="$imgsrc">
<br>
$table
<br>
More Details are available on the <a href="$FShareURL">BookMyShow website</a>.
EOT;
}
/**
@@ -1292,14 +1293,15 @@ EOT;
$synopsis = preg_replace(self::SYNOPSIS_REGEX, '', $data['EventSynopsis']);
$eventTrailerURL = $data['EventTrailerURL'];
return <<<EOT
<img title="Movie Poster" src="$imgsrc"></img>
<div>$table</div>
<p>$innerHtml</p>
<p>${synopsis}</p>
More Details are available on the <a href="$url">BookMyShow website</a> and a trailer is available
<a href="${data['EventTrailerURL']}" title="Trailer URL">here</a>
EOT;
<img title="Movie Poster" src="$imgsrc"></img>
<div>$table</div>
<p>$innerHtml</p>
<p>$synopsis</p>
More Details are available on the <a href="$url">BookMyShow website</a> and a trailer is available
<a href="$eventTrailerURL" title="Trailer URL">here</a>
EOT;
}
/**

View File

@@ -164,7 +164,7 @@ class BugzillaBridge extends BridgeAbstract
}
$cache = $this->loadCacheValue($this->instance . $user);
if (!is_null($cache)) {
if ($cache) {
return $cache;
}

View File

@@ -56,7 +56,7 @@ class CNETBridge extends SitemapBridge
foreach ($links as $article_uri) {
$article_dom = convertLazyLoading(getSimpleHTMLDOMCached($article_uri));
$title = trim($article_dom->find('h1', 0)->plaintext);
$author = $article_dom->find('span.c-assetAuthor_name', 0)->plaintext;
$author = $article_dom->find('span.c-assetAuthor_name', 0);
$headline = $article_dom->find('p.c-contentHeader_description', 0);
$content = $article_dom->find('div.c-pageArticle_content, div.single-article__content, div.article-main-body', 0);
$date = null;
@@ -97,7 +97,11 @@ class CNETBridge extends SitemapBridge
$item = [];
$item['uri'] = $article_uri;
$item['title'] = $title;
$item['author'] = $author;
if ($author) {
$item['author'] = $author->plaintext;
}
$item['content'] = $content;
if (!is_null($date)) {

View File

@@ -42,45 +42,23 @@ class CVEDetailsBridge extends BridgeAbstract
$this->fetchContent();
}
foreach ($this->html->find('#searchresults > .row') as $i => $tr) {
// There are some optional vulnerability types, which will be
// added to the categories as well as the CWE number -- which is
// always given.
$categories = [$this->vendor];
$enclosures = [];
$detailLink = $tr->find('h3 > a', 0);
$detailHtml = getSimpleHTMLDOM($detailLink->href);
// The CVE number itself
$var = $this->html->find('#searchresults > div > div.row');
foreach ($var as $i => $tr) {
$uri = $tr->find('h3 > a', 0)->href ?? null;
$title = $tr->find('h3 > a', 0)->innertext;
$content = $tr->find('.cvesummarylong', 0)->innertext;
$cweList = $detailHtml->find('h2', 2)->next_sibling();
foreach ($cweList->find('li') as $li) {
$cweWithDescription = $li->find('a', 0)->innertext ?? '';
if (preg_match('/CWE-(\d+)/', $cweWithDescription, $cwe)) {
$categories[] = 'CWE-' . $cwe[1];
$enclosures[] = 'https://cwe.mitre.org/data/definitions/' . $cwe[1] . '.html';
}
}
if ($this->product != '') {
$categories[] = $this->product;
}
$content = $tr->find('.cvesummarylong', 0)->innertext ?? '';
$timestamp = $tr->find('[data-tsvfield="publishDate"]', 0)->innertext ?? 0;
$this->items[] = [
'uri' => 'https://cvedetails.com/' . $detailHtml->find('h1 > a', 0)->href,
'uri' => $uri,
'title' => $title,
'timestamp' => $tr->find('[data-tsvfield="publishDate"]', 0)->innertext,
'timestamp' => $timestamp,
'content' => $content,
'categories' => $categories,
'enclosures' => $enclosures,
'categories' => [$this->vendor],
'enclosures' => [],
'uid' => $title,
];
// We only want to fetch the latest 10 CVEs
if (count($this->items) >= 10) {
if (count($this->items) >= 30) {
break;
}
}

View File

@@ -6,46 +6,113 @@ class CarThrottleBridge extends BridgeAbstract
const URI = 'https://www.carthrottle.com/';
const DESCRIPTION = 'Get the latest car-related news from Car Throttle.';
const MAINTAINER = 't0stiman';
const DONATION_URI = 'https://ko-fi.com/tostiman';
const PARAMETERS = [
'Show articles from these categories:' => [
'news' => [
'name' => 'news',
'type' => 'checkbox'
],
'reviews' => [
'name' => 'reviews',
'type' => 'checkbox'
],
'features' => [
'name' => 'features',
'type' => 'checkbox'
],
'videos' => [
'name' => 'videos',
'type' => 'checkbox'
],
'gaming' => [
'name' => 'gaming',
'type' => 'checkbox'
]
]
];
public function collectData()
{
$news = getSimpleHTMLDOMCached(self::URI . 'news');
$this->items = [];
$this->items[] = [];
$this->handleCategory('news');
$this->handleCategory('reviews');
$this->handleCategory('features');
$this->handleCategory2('videos', 'video');
$this->handleCategory('gaming');
}
private function handleCategory($category)
{
if ($this->getInput($category)) {
$this->getArticles($category);
}
}
private function handleCategory2($categoryParameter, $categoryURLname)
{
if ($this->getInput($categoryParameter)) {
$this->getArticles($categoryURLname);
}
}
private function getArticles($category)
{
$categoryPage = getSimpleHTMLDOMCached(self::URI . $category);
//for each post
foreach ($news->find('div.cmg-card') as $post) {
foreach ($categoryPage->find('div.cmg-card') as $post) {
$item = [];
$titleElement = $post->find('div.title a.cmg-link')[0];
$item['uri'] = self::URI . $titleElement->getAttribute('href');
$titleElement = $post->find('div.title a')[0];
$post_uri = self::URI . $titleElement->getAttribute('href');
if (!isset($post_uri) || $post_uri == '') {
continue;
}
$item['uri'] = $post_uri;
$item['title'] = $titleElement->innertext;
$articlePage = getSimpleHTMLDOMCached($item['uri']);
$authorDiv = $articlePage->find('div.author div');
if ($authorDiv) {
$item['author'] = $authorDiv[1]->innertext;
}
$item['author'] = $this->parseAuthor($articlePage);
$articleImage = $articlePage->find('div.block-layout-field-image')[0];
$article = $articlePage->find('div.block-layout-body')[1];
$dinges = $articlePage->find('div.main-body')[0] ?? null;
//remove ads
if ($dinges) {
foreach ($dinges->find('aside') as $ad) {
$ad->outertext = '';
$dinges->save();
}
foreach ($article->find('aside') as $ad) {
$ad->outertext = '';
}
$var = $articlePage->find('div.summary')[0] ?? '';
$var1 = $articlePage->find('figure.main-image')[0] ?? '';
$dinges1 = $dinges ?? '';
$summary = $articlePage->find('div.summary')[0];
$item['content'] = $var .
$var1 .
$dinges1;
//these are supposed to be hidden
foreach ($article->find('.visually-hidden') as $found) {
$found->outertext = '';
}
$item['content'] = $summary . $articleImage . $article;
array_push($this->items, $item);
}
}
private function parseAuthor($articlePage)
{
$authorDivs = $articlePage->find('div address');
if (!$authorDivs) {
return '';
}
$a = $authorDivs[0]->find('a')[0];
if ($a) {
return $a->innertext;
}
return $authorDivs[0]->innertext;
}
}

View File

@@ -54,7 +54,7 @@ class CaschyBridge extends FeedExpander
{
// remove unwanted stuff
foreach (
$article->find('div.video-container, div.aawp, p.aawp-disclaimer, iframe.wp-embedded-content,
$article->find('div.aawp, p.aawp-disclaimer, iframe.wp-embedded-content,
div.wp-embed, p.wp-caption-text, script') as $element
) {
$element->remove();

View File

@@ -0,0 +1,279 @@
<?php
class CentreFranceBridge extends BridgeAbstract
{
const NAME = 'Centre France Newspapers';
const URI = 'https://www.centrefrance.com/';
const DESCRIPTION = 'Common bridge for all Centre France group newspapers.';
const CACHE_TIMEOUT = 7200; // 2h
const MAINTAINER = 'quent1';
const PARAMETERS = [
'global' => [
'newspaper' => [
'name' => 'Newspaper',
'type' => 'list',
'values' => [
'La Montagne' => 'lamontagne.fr',
'Le Populaire du Centre' => 'lepopulaire.fr',
'La République du Centre' => 'larep.fr',
'Le Berry Républicain' => 'leberry.fr',
'L\'Yonne Républicaine' => 'lyonne.fr',
'L\'Écho Républicain' => 'lechorepublicain.fr',
'Le Journal du Centre' => 'lejdc.fr',
'L\'Éveil de la Haute-Loire' => 'leveil.fr',
'Le Pays' => 'le-pays.fr'
]
],
'remove-reserved-for-subscribers-articles' => [
'name' => 'Remove reserved for subscribers articles',
'type' => 'checkbox',
'title' => 'Filter out articles that are only available to subscribers'
],
'limit' => [
'name' => 'Limit',
'type' => 'number',
'title' => 'How many articles to fetch. 0 to disable.',
'required' => true,
'defaultValue' => 15
]
],
'Local news' => [
'locality-slug' => [
'name' => 'Locality slug',
'type' => 'text',
'required' => false,
'title' => 'Fetch articles for a specific locality. If not set, headlines from the front page will be used instead.',
'exampleValue' => 'moulins-03000'
],
]
];
public function collectData()
{
$value = $this->getInput('limit');
if (is_numeric($value) && (int)$value >= 0) {
$limit = $value;
} else {
$limit = static::PARAMETERS['global']['limit']['defaultValue'];
}
if (empty($this->getInput('newspaper'))) {
return;
}
$localitySlug = $this->getInput('locality-slug') ?? '';
$alreadyFoundArticlesURIs = [];
$newspaperUrl = 'https://www.' . $this->getInput('newspaper') . '/' . $localitySlug . '/';
$html = getSimpleHTMLDOM($newspaperUrl);
// Articles are detected through their titles
foreach ($html->find('.c-titre') as $articleTitleDOMElement) {
$articleLinkDOMElement = $articleTitleDOMElement->find('a', 0);
// Ignore articles in the « Les + partagés » block
if (strpos($articleLinkDOMElement->id, 'les_plus_partages') !== false) {
continue;
}
$articleURI = $articleLinkDOMElement->href;
// If the URI has already been processed, ignore it
if (in_array($articleURI, $alreadyFoundArticlesURIs, true)) {
continue;
}
// If news are filtered for a specific locality, filter out article for other localities
if ($localitySlug !== '' && !str_contains($articleURI, $localitySlug)) {
continue;
}
$articleTitle = '';
// If article is reserved for subscribers
if ($articleLinkDOMElement->find('span.premium-picto', 0)) {
if ($this->getInput('remove-reserved-for-subscribers-articles') === true) {
continue;
}
$articleTitle .= '🔒 ';
}
$articleTitleDOMElement = $articleLinkDOMElement->find('span[data-tb-title]', 0);
if ($articleTitleDOMElement === null) {
continue;
}
if ($limit > 0 && count($this->items) === $limit) {
break;
}
$articleTitle .= $articleLinkDOMElement->find('span[data-tb-title]', 0)->innertext;
$articleFullURI = urljoin('https://www.' . $this->getInput('newspaper') . '/', $articleURI);
$item = [
'title' => $articleTitle,
'uri' => $articleFullURI,
...$this->collectArticleData($articleFullURI)
];
$this->items[] = $item;
$alreadyFoundArticlesURIs[] = $articleURI;
}
}
private function collectArticleData($uri): array
{
$html = getSimpleHTMLDOMCached($uri, 86400 * 90); // 90d
$item = [
'enclosures' => [],
];
$articleInformations = $html->find('.c-article-informations p');
if (is_array($articleInformations) && $articleInformations !== []) {
$authorPosition = 1;
// Article publication date
if (preg_match('/(\d{2})\/(\d{2})\/(\d{4})( à (\d{2})h(\d{2}))?/', $articleInformations[0]->innertext, $articleDateParts) > 0) {
$articleDate = new \DateTime('midnight');
$articleDate->setDate($articleDateParts[3], $articleDateParts[2], $articleDateParts[1]);
if (count($articleDateParts) === 7) {
$articleDate->setTime($articleDateParts[5], $articleDateParts[6]);
}
$item['timestamp'] = $articleDate->getTimestamp();
}
// Article update date
if (count($articleInformations) >= 2 && preg_match('/(\d{2})\/(\d{2})\/(\d{4})( à (\d{2})h(\d{2}))?/', $articleInformations[1]->innertext, $articleDateParts) > 0) {
$authorPosition = 2;
$articleDate = new \DateTime('midnight');
$articleDate->setDate($articleDateParts[3], $articleDateParts[2], $articleDateParts[1]);
if (count($articleDateParts) === 7) {
$articleDate->setTime($articleDateParts[5], $articleDateParts[6]);
}
$item['timestamp'] = $articleDate->getTimestamp();
}
if (count($articleInformations) === ($authorPosition + 1)) {
$item['author'] = $articleInformations[$authorPosition]->innertext;
}
}
$articleContent = $html->find('.b-article .contenu > *');
if (is_array($articleContent)) {
$item['content'] = '';
foreach ($articleContent as $contentPart) {
if (in_array($contentPart->getAttribute('id'), ['cf-audio-player', 'poool-widget'], true)) {
continue;
}
$articleHiddenParts = $contentPart->find('.bloc, .p402_hide');
if (is_array($articleHiddenParts)) {
foreach ($articleHiddenParts as $articleHiddenPart) {
$contentPart->removeChild($articleHiddenPart);
}
}
$item['content'] .= $contentPart->innertext;
}
}
$articleIllustration = $html->find('.photo-wrapper .photo-box img');
if (is_array($articleIllustration) && count($articleIllustration) === 1) {
$item['enclosures'][] = $articleIllustration[0]->getAttribute('src');
}
$articleAudio = $html->find('#cf-audio-player-container audio');
if (is_array($articleAudio) && count($articleAudio) === 1) {
$item['enclosures'][] = $articleAudio[0]->getAttribute('src');
}
$articleTags = $html->find('.b-article > ul.c-tags > li > a.t-simple');
if (is_array($articleTags)) {
$item['categories'] = array_map(static fn ($articleTag) => $articleTag->innertext, $articleTags);
}
$explode = explode('_', $uri);
$array_reverse = array_reverse($explode);
$string = $array_reverse[0];
$uid = rtrim($string, '/');
if (is_numeric($uid)) {
$item['uid'] = $uid;
}
// If the article is a "grand format", we use another parsing strategy
if ($item['content'] === '' && $html->find('article') !== []) {
$articleContent = $html->find('article > section');
foreach ($articleContent as $contentPart) {
if ($contentPart->find('#journo') !== []) {
$item['author'] = $contentPart->find('#journo')->innertext;
continue;
}
$item['content'] .= $contentPart->innertext;
}
}
$item['content'] = str_replace('<span class="p-premium">premium</span>', '🔒', $item['content']);
$item['content'] = trim($item['content']);
return $item;
}
public function getName()
{
if (empty($this->getInput('newspaper'))) {
return static::NAME;
}
$newspaperNameByDomain = array_flip(self::PARAMETERS['global']['newspaper']['values']);
if (!isset($newspaperNameByDomain[$this->getInput('newspaper')])) {
return static::NAME;
}
$completeTitle = $newspaperNameByDomain[$this->getInput('newspaper')];
if (!empty($this->getInput('locality-slug'))) {
$localityName = explode('-', $this->getInput('locality-slug'));
array_pop($localityName);
$completeTitle .= ' ' . ucfirst(implode('-', $localityName));
}
return $completeTitle;
}
public function getIcon()
{
if (empty($this->getInput('newspaper'))) {
return static::URI . '/favicon.ico';
}
return 'https://www.' . $this->getInput('newspaper') . '/favicon.ico';
}
public function detectParameters($url)
{
$regex = '/^(https?:\/\/)?(www\.)?([a-z-]+\.fr)(\/)?([a-z-]+-[0-9]{5})?(\/)?$/';
$url = strtolower($url);
if (preg_match($regex, $url, $urlMatches) === 0) {
return null;
}
if (!in_array($urlMatches[3], self::PARAMETERS['global']['newspaper']['values'], true)) {
return null;
}
return [
'newspaper' => $urlMatches[3],
'locality-slug' => empty($urlMatches[5]) ? null : $urlMatches[5]
];
}
}

View File

@@ -60,7 +60,7 @@ class CeskaTelevizeBridge extends BridgeAbstract
foreach ($html->find('#episodeListSection a[data-testid=card]') as $element) {
$itemTitle = $element->find('h3', 0);
$itemContent = $element->find('p[class^=content-]', 0);
$itemDate = $element->find('div[class^=playTime-] span', 0);
$itemDate = $element->find('div[class^=playTime-] span, [data-testid=episode-item-broadcast] span', 0);
$itemThumbnail = $element->find('img', 0);
$itemUri = self::URI . $element->getAttribute('href');

View File

@@ -2,59 +2,65 @@
class ComicsKingdomBridge extends BridgeAbstract
{
const MAINTAINER = 'stjohnjohnson';
const MAINTAINER = 'TReKiE';
// const MAINTAINER = 'stjohnjohnson';
const NAME = 'Comics Kingdom Unofficial RSS';
const URI = 'https://comicskingdom.com/';
const URI = 'https://wp.comicskingdom.com/wp-json/wp/v2/ck_comic';
const CACHE_TIMEOUT = 21600; // 6h
const DESCRIPTION = 'Comics Kingdom Unofficial RSS';
const PARAMETERS = [ [
'comicname' => [
'name' => 'comicname',
'name' => 'Name of comic',
'type' => 'text',
'exampleValue' => 'mutts',
'title' => 'The name of the comic in the URL after https://comicskingdom.com/',
'required' => true
],
'limit' => [
'name' => 'Limit',
'type' => 'number',
'title' => 'The number of recent comics to get',
'defaultValue' => 10
]
]];
protected $comicName;
public function collectData()
{
$html = getSimpleHTMLDOM($this->getURI(), [], [], true, false);
$json = getContents($this->getURI());
$data = json_decode($json, false);
// Get author from first page
$author = $html->find('div.author p', 0);
;
if (isset($data[0]->_embedded->{'wp:term'}[0][0])) {
$this->comicName = $data[0]->_embedded->{'wp:term'}[0][0]->name;
}
// Get current date/link
$link = $html->find('meta[property=og:url]', -1)->content;
for ($i = 0; $i < 3; $i++) {
foreach ($data as $comicitem) {
$item = [];
$page = getSimpleHTMLDOM($link);
$imagelink = $page->find('meta[property=og:image]', 0)->content;
$date = explode('/', $link);
$item['id'] = $imagelink;
$item['uri'] = $link;
$item['author'] = $author;
$item['title'] = 'Comics Kingdom ' . $this->getInput('comicname');
$item['timestamp'] = DateTime::createFromFormat('Y-m-d', $date[count($date) - 1])->getTimestamp();
$item['content'] = '<img src="' . $imagelink . '" />';
$item['id'] = $comicitem->id;
$item['uri'] = $comicitem->yoast_head_json->og_url;
$item['author'] = str_ireplace('By ', '', $comicitem->ck_comic_byline);
$item['title'] = $comicitem->yoast_head_json->title;
$item['timestamp'] = $comicitem->date;
$item['content'] = '<img src="' . $comicitem->yoast_head_json->og_image[0]->url . '" />';
$this->items[] = $item;
$link = $page->find('div.comic-viewer-inline a', 0)->href;
if (empty($link)) {
break; // allow bridge to continue if there's less than 3 comics
}
}
}
public function getURI()
{
if (!is_null($this->getInput('comicname'))) {
return self::URI . urlencode($this->getInput('comicname'));
$params = [
'ck_feature' => $this->getInput('comicname'),
'per_page' => $this->getInput('limit'),
'date_inclusive' => 'true',
'order' => 'desc',
'page' => '1',
'_embed' => 'true'
];
return self::URI . '?' . http_build_query($params);
}
return parent::getURI();
@@ -62,8 +68,8 @@ class ComicsKingdomBridge extends BridgeAbstract
public function getName()
{
if (!is_null($this->getInput('comicname'))) {
return $this->getInput('comicname') . ' - Comics Kingdom';
if ($this->comicName) {
return $this->comicName . ' - Comics Kingdom';
}
return parent::getName();

View File

@@ -442,7 +442,7 @@ class CssSelectorComplexBridge extends BridgeAbstract
if (!is_null($time_selector) && $time_selector != '') {
$time_element = $entry_html->find($time_selector, 0);
$time = $time_element->getAttribute('datetime');
if (is_null($time)) {
if (empty($time)) {
$time = $time_element->innertext;
}

View File

@@ -47,8 +47,10 @@ class CubariBridge extends BridgeAbstract
*/
public function collectData()
{
// TODO: fix trivial SSRF
$json = getContents($this->getInput('gist'));
$jsonFile = json_decode($json, true);
$jsonFile = Json::decode($json);
$this->mangaTitle = $jsonFile['title'];

View File

@@ -0,0 +1,129 @@
<?php
class CubariProxyBridge extends BridgeAbstract
{
const NAME = 'Cubari Proxy';
const MAINTAINER = 'phantop';
const URI = 'https://cubari.moe';
const DESCRIPTION = 'Returns chapters from Cubari.';
const PARAMETERS = [[
'service' => [
'name' => 'Content service',
'type' => 'list',
'defaultValue' => 'mangadex',
'values' => [
'MangAventure' => 'mangadventure',
'MangaDex' => 'mangadex',
'MangaKatana' => 'mangakatana',
'MangaSee' => 'mangasee',
]
],
'series' => [
'name' => 'Series ID/Name',
'exampleValue' => '8c1d7d0c-e0b7-4170-941d-29f652c3c19d', # KnH
'required' => true,
],
'fetch' => [
'name' => 'Fetch chapter page images',
'type' => 'list',
'title' => 'Places chapter images in feed contents. Entries will consume more bandwidth.',
'defaultValue' => 'c',
'values' => [
'None' => 'n',
'Content' => 'c',
'Enclosure' => 'e'
]
],
'limit' => self::LIMIT
]];
private $title;
public function collectData()
{
$limit = $this->getInput('limit') ?? 10;
$url = parent::getURI() . '/read/api/' . $this->getInput('service') . '/series/' . $this->getInput('series');
$json = Json::decode(getContents($url));
$this->title = $json['title'];
$chapters = $json['chapters'];
krsort($chapters);
$count = 0;
foreach ($chapters as $number => $element) {
$item = [];
$item['uri'] = $this->getURI() . '/' . $number;
if ($element['title']) {
$item['title'] = $number . ' - ' . $element['title'];
} else {
$item['title'] = 'Volume ' . $element['volume'] . ' Chapter ' . $number;
}
$group = '1';
if (isset($element['release_date'])) {
$dates = $element['release_date'];
$date = max($dates);
$item['timestamp'] = $date;
$group = array_keys($dates, $date)[0];
}
$page = $element['groups'][$group];
$item['author'] = $json['groups'][$group];
$api = parent::getURI() . $page;
$item['uid'] = $page;
$item['comments'] = $api;
if ($this->getInput('fetch') != 'n') {
$pages = [];
try {
$jsonp = getContents($api);
$pages = Json::decode($jsonp);
} catch (HttpException $e) {
// allow error 500, as it's effectively a 429
if ($e->getCode() != 500) {
throw $e;
}
}
if ($this->getInput('fetch') == 'e') {
$item['enclosures'] = $pages;
}
if ($this->getInput('fetch') == 'c') {
$item['content'] = '';
foreach ($pages as $img) {
$item['content'] .= '<img src="' . $img . '"/>';
}
}
}
if ($count++ == $limit) {
break;
}
$this->items[] = $item;
}
}
public function getName()
{
$name = parent::getName();
if (isset($this->title)) {
$name .= ' - ' . $this->title;
}
return $name;
}
public function getURI()
{
$uri = parent::getURI();
if ($this->getInput('service')) {
$uri .= '/read/' . $this->getInput('service') . '/' . $this->getInput('series');
}
return $uri;
}
public function getIcon()
{
return parent::getURI() . '/static/favicon.png';
}
}

View File

@@ -0,0 +1,107 @@
<?php
class DRKBlutspendeBridge extends FeedExpander
{
const MAINTAINER = 'User123698745';
const NAME = 'DRK-Blutspende';
const BASE_URI = 'https://www.drk-blutspende.de';
const URI = self::BASE_URI;
const CACHE_TIMEOUT = 60 * 60 * 1; // 1 hour
const DESCRIPTION = 'German Red Cross (Deutsches Rotes Kreuz) blood donation service feed with more details';
const CONTEXT_APPOINTMENTS = 'Termine';
const PARAMETERS = [
self::CONTEXT_APPOINTMENTS => [
'term' => [
'name' => 'PLZ / Ort',
'required' => true,
'exampleValue' => '12555',
],
'radius' => [
'name' => 'Umkreis in km',
'type' => 'number',
'exampleValue' => 10,
],
'limit_days' => [
'name' => 'Limit von Tagen',
'title' => 'Nur Termine innerhalb der nächsten x Tagen',
'type' => 'number',
'exampleValue' => 28,
],
'limit_items' => [
'name' => 'Limit von Terminen',
'title' => 'Nicht mehr als x Termine',
'type' => 'number',
'required' => true,
'defaultValue' => 20,
]
]
];
public function collectData()
{
$limitItems = intval($this->getInput('limit_items'));
$this->collectExpandableDatas(self::buildAppointmentsURI(), $limitItems);
}
protected function parseItem(array $item)
{
$html = getSimpleHTMLDOM($item['uri']);
$detailsElement = $html->find('.details', 0);
$dateElement = $detailsElement->find('.datum', 0);
$dateLines = self::explodeLines($dateElement->plaintext);
$addressElement = $detailsElement->find('.adresse', 0);
$addressLines = self::explodeLines($addressElement->plaintext);
$infoElement = $detailsElement->find('.angebote > h4 + p', 0);
$info = $infoElement ? $infoElement->innertext : '';
$imageElements = $detailsElement->find('.fotos img');
$item['title'] = $dateLines[0] . ' ' . $dateLines[1] . ' ' . $addressLines[0] . ' - ' . $addressLines[1];
$item['content'] = <<<HTML
<p><b>{$dateLines[0]} {$dateLines[1]}</b></p>
<p>{$addressElement->innertext}</p>
<p>{$info}</p>
HTML;
foreach ($imageElements as $imageElement) {
$src = $imageElement->getAttribute('src');
$item['content'] .= <<<HTML
<p><img src="{$src}"></p>
HTML;
}
$item['description'] = null;
return $item;
}
public function getURI()
{
if ($this->queriedContext === self::CONTEXT_APPOINTMENTS) {
return str_replace('.rss?', '?', self::buildAppointmentsURI());
}
return parent::getURI();
}
private function buildAppointmentsURI()
{
$term = $this->getInput('term') ?? '';
$radius = $this->getInput('radius') ?? '';
$limitDays = intval($this->getInput('limit_days'));
$dateTo = $limitDays > 0 ? date('Y-m-d', time() + (60 * 60 * 24 * $limitDays)) : '';
return self::BASE_URI . '/blutspendetermine/termine.rss?date_to=' . $dateTo . '&radius=' . $radius . '&term=' . $term;
}
/**
* Returns an array of strings, each of which is a substring of string formed by splitting it on boundaries formed by line breaks.
*/
private function explodeLines(string $text): array
{
return array_map('trim', preg_split('/(\s*(\r\n|\n|\r)\s*)+/', $text));
}
}

104
bridges/DacksnackBridge.php Normal file
View File

@@ -0,0 +1,104 @@
<?PHP
class DacksnackBridge extends BridgeAbstract
{
const NAME = 'Däcksnack';
const URI = 'https://www.tidningendacksnack.se';
const DESCRIPTION = 'Latest news by the magazine Däcksnack';
const MAINTAINER = 'ajain-93';
public function getIcon()
{
return self::URI . '/upload/favicon/2591047722.png';
}
private function parseSwedishDates($dateString)
{
// Mapping of Swedish month names to English month names
$monthNames = [
'januari' => '01',
'februari' => '02',
'mars' => '03',
'april' => '04',
'maj' => '05',
'juni' => '06',
'juli' => '07',
'augusti' => '08',
'september' => '09',
'oktober' => '10',
'november' => '11',
'december' => '12'
];
// Split the date string into parts
list($day, $monthName, $year) = explode(' ', $dateString);
// Convert month name to month number
$month = $monthNames[$monthName];
// Format to a string recognizable by DateTime
$formattedDate = sprintf('%04d-%02d-%02d', $year, $month, $day);
// Create a DateTime object
$dateValue = new DateTime($formattedDate);
if ($dateValue) {
$dateValue->setTime(0, 0); // Set time to 00:00
return $dateValue->getTimestamp();
}
return $dateValue ? $dateValue->getTimestamp() : false;
}
public function collectData()
{
$NEWSURL = self::URI;
$html = getSimpleHTMLDOMCached($NEWSURL, 18000) or
returnServerError('Could not request: ' . $NEWSURL);
foreach ($html->find('a.main-news-item') as $element) {
// Debug::log($element);
$title = trim($element->find('h2', 0)->plaintext);
$category = trim($element->find('.category-tag', 0)->plaintext);
$url = self::URI . $element->getAttribute('href');
$published = $this->parseSwedishDates(trim($element->find('.published', 0)->plaintext));
$article_html = getSimpleHTMLDOMCached($url, 18000) or
returnServerError('Could not request: ' . $url);
$article_content = $article_html->find('#ctl00_ContentPlaceHolder1_NewsArticleVeiw_pnlArticle', 0);
$figure = self::URI . $article_content->find('img.news-image', 0)->getAttribute('src');
$figure_caption = $article_content->find('.image-description', 0)->plaintext;
$author = $article_content->find('span.main-article-author', 0)->plaintext;
$preamble = $article_content->find('h4.main-article-ingress', 0)->plaintext;
$article_text = '';
foreach ($article_content->find('div') as $div) {
if (!$div->hasAttribute('class')) {
$article_text = $div;
}
}
// Use a regular expression to extract the name
if (preg_match('/Text:\s*(.*?)\s*Foto:/', $author, $matches)) {
$author = $matches[1]; // This will contain 'Jonna Jansson'
}
$content = '<b> [' . $category . '] <i>' . $preamble . '</i></b><br/><br/>';
$content .= '<figure>';
$content .= '<img src=' . $figure . '>';
$content .= '<figcaption>' . $figure_caption . '</figcaption>';
$content .= '</figure>';
$content .= $article_text;
$this->items[] = [
'uri' => $url,
'title' => $title,
'author' => $author,
'timestamp' => $published,
'content' => trim($content),
];
}
}
}

View File

@@ -27,11 +27,6 @@ class DagensNyheterDirektBridge extends BridgeAbstract
$url = self::BASEURL . $link;
$title = $element->find('h2', 0)->plaintext;
$author = $element->find('div.ds-byline__titles', 0)->plaintext;
// Debug::log($link);
// Debug::log($datetime);
// Debug::log($title);
// Debug::log($url);
// Debug::log($author);
$article_content = $element->find('div.direkt-post__content', 0);
$article_html = '';

View File

@@ -0,0 +1,96 @@
<?php
class DailythanthiBridge extends BridgeAbstract
{
const NAME = 'Dailythanthi';
const URI = 'https://www.dailythanthi.com';
const DESCRIPTION = 'Retrieve news from dailythanthi.com';
const MAINTAINER = 'tillcash';
const PARAMETERS = [
[
'topic' => [
'name' => 'topic',
'type' => 'list',
'values' => [
'news' => [
'tamilnadu' => 'news/state',
'india' => 'news/india',
'world' => 'news/world',
'sirappu-katturaigal' => 'news/sirappukatturaigal',
],
'cinema' => [
'news' => 'cinema/cinemanews',
],
'sports' => [
'sports' => 'sports',
'cricket' => 'sports/cricket',
'football' => 'sports/football',
'tennis' => 'sports/tennis',
'hockey' => 'sports/hockey',
'other-sports' => 'sports/othersports',
],
'devotional' => [
'devotional' => 'others/devotional',
'aalaya-varalaru' => 'aalaya-varalaru',
],
],
],
],
];
public function getName()
{
$topic = $this->getKey('topic');
return self::NAME . ($topic ? ' - ' . ucfirst($topic) : '');
}
public function collectData()
{
$dom = getSimpleHTMLDOM(self::URI . '/' . $this->getInput('topic'));
foreach ($dom->find('div.ListingNewsWithMEDImage') as $element) {
$slug = $element->find('a', 1);
$title = $element->find('h3', 0);
if (!$slug || !$title) {
continue;
}
$url = self::URI . $slug->href;
$date = $element->find('span', 1);
$date = $date ? $date->{'data-datestring'} : '';
$this->items[] = [
'content' => $this->constructContent($url),
'timestamp' => $date ? $date . 'UTC' : '',
'title' => $title->plaintext,
'uid' => $slug->href,
'uri' => $url,
];
}
}
private function constructContent($url)
{
$dom = getSimpleHTMLDOMCached($url);
$article = $dom->find('div.details-content-story', 0);
if (!$article) {
return 'Content Not Found';
}
// Remove ads
foreach ($article->find('div[id*="_ad"]') as $remove) {
$remove->outertext = '';
}
// Correct image tag in $article
foreach ($article->find('h-img') as $img) {
$img->parent->outertext = sprintf('<p><img src="%s"></p>', $img->src);
}
$image = $dom->find('div.main-image-caption-container img', 0);
$image = $image ? '<p>' . $image->outertext . '</p>' : '';
return $image . $article;
}
}

View File

@@ -9,7 +9,7 @@ class DarkReadingBridge extends FeedExpander
const PARAMETERS = [ [
'feed' => [
'name' => 'Feed',
'name' => 'Feed (NOT IN USE)',
'type' => 'list',
'values' => [
'All Dark Reading Stories' => '000_AllArticles',
@@ -41,17 +41,7 @@ class DarkReadingBridge extends FeedExpander
public function collectData()
{
$feed = $this->getInput('feed');
$feed_splitted = explode('_', $feed);
$feed_id = $feed_splitted[0];
$feed_name = $feed_splitted[1];
if (empty($feed) || !ctype_digit($feed_id) || !preg_match('/[A-Za-z%20\/]/', $feed_name)) {
returnClientError('Invalid feed, please check the "feed" parameter.');
}
$feed_url = $this->getURI() . 'rss_simple.asp';
if ($feed_id != '000') {
$feed_url .= '?f_n=' . $feed_id . '&f_ln=' . $feed_name;
}
$feed_url = 'https://www.darkreading.com/rss.xml';
$limit = $this->getInput('limit') ?? 10;
$this->collectExpandableDatas($feed_url, $limit);
}
@@ -71,7 +61,7 @@ class DarkReadingBridge extends FeedExpander
private function extractArticleContent($article)
{
$content = $article->find('div.article-content', 0)->innertext;
$content = $article->find('div.ContentModule-Wrapper', 0)->innertext;
foreach (
[

View File

@@ -1,40 +0,0 @@
<?php
class DavesTrailerPageBridge extends BridgeAbstract
{
const MAINTAINER = 'johnnygroovy';
const NAME = 'Daves Trailer Page Bridge';
const URI = 'https://www.davestrailerpage.co.uk/';
const DESCRIPTION = 'Last trailers in HD thanks to Dave.';
public function collectData()
{
$html = getSimpleHTMLDOM(static::URI)
or returnClientError('No results for this query.');
$curr_date = null;
foreach ($html->find('tr') as $tr) {
// If it's a date row, update the current date
if ($tr->align == 'center') {
$curr_date = $tr->plaintext;
continue;
}
$item = [];
// title
$item['title'] = $tr->find('td', 0)->find('b', 0)->plaintext;
// content
$item['content'] = $tr->find('ul', 1);
// uri
$item['uri'] = $tr->find('a', 3)->getAttribute('href');
// date: parsed by FeedItem using strtotime
$item['timestamp'] = $curr_date;
$this->items[] = $item;
}
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -24,7 +24,8 @@ class DemosBerlinBridge extends BridgeAbstract
public function collectData()
{
$json = getContents('https://www.berlin.de/polizei/service/versammlungsbehoerde/versammlungen-aufzuege/index.php/index/all.json');
$url = 'https://www.berlin.de/polizei/service/versammlungsbehoerde/versammlungen-aufzuege/index.php/index/all.json';
$json = getContents($url);
$jsonFile = json_decode($json, true);
$daysInterval = DateInterval::createFromDateString($this->getInput('days') . ' day');

View File

@@ -78,13 +78,9 @@ class DerpibooruBridge extends BridgeAbstract
public function collectData()
{
$queryJson = json_decode(getContents(
self::URI
. 'api/v1/json/search/images?filter_id='
. urlencode($this->getInput('f'))
. '&q='
. urlencode($this->getInput('q'))
));
$url = self::URI . 'api/v1/json/search/images?filter_id=' . urlencode($this->getInput('f')) . '&q=' . urlencode($this->getInput('q'));
$queryJson = json_decode(getContents($url));
foreach ($queryJson->images as $post) {
$item = [];

View File

@@ -73,12 +73,12 @@ class DeutscheWelleBridge extends FeedExpander
protected function parseItem(array $item)
{
$parsedUrl = parse_url($item['uri']);
unset($parsedUrl['query']);
$url = $this->unparseUrl($parsedUrl);
$parsedUri = parse_url($item['uri']);
unset($parsedUri['query']);
$item['uri'] = $this->unparseUrl($parsedUri);
$page = getSimpleHTMLDOM($url);
$page = defaultLinkTo($page, $url);
$page = getSimpleHTMLDOM($item['uri']);
$page = defaultLinkTo($page, $item['uri']);
$article = $page->find('article', 0);
@@ -112,6 +112,13 @@ class DeutscheWelleBridge extends FeedExpander
$img->height = null;
}
// remove bad img src's added by defaultLinkTo() above
// these images should have src="" and will then use
// the srcset attribute to load the best image for the displayed size
foreach ($article->find('figure > picture > img') as $img) {
$img->src = '';
}
// replace lazy-loaded images
foreach ($article->find('figure.placeholder-image') as $figure) {
$img = $figure->find('img', 0);

View File

@@ -47,7 +47,7 @@ class DiarioDoAlentejoBridge extends BridgeAbstract
}, self::PT_MONTH_NAMES),
array_map(function ($num) {
return sprintf('-%02d-', $num);
}, range(1, sizeof(self::PT_MONTH_NAMES))),
}, range(1, count(self::PT_MONTH_NAMES))),
$element->find('span.date', 0)->innertext
);

View File

@@ -18,12 +18,12 @@ favicon-63b2904a073c89b52b19aa08cebc16a154bcf83fee8ecc6439968b1e6db569c7.ico';
{
$html = getSimpleHTMLDOM(self::URI);
$json = $this->loadEmbeddedJsonData($html);
$data = $this->fetchData($html);
foreach ($html->find('li[id^="screenshot-"]') as $shot) {
$item = [];
$additional_data = $this->findJsonForShot($shot, $json);
$additional_data = $this->findJsonForShot($shot, $data);
if ($additional_data === null) {
$item['uri'] = self::URI . $shot->find('a', 0)->href;
$item['title'] = $shot->find('.shot-title', 0)->plaintext;
@@ -46,9 +46,8 @@ favicon-63b2904a073c89b52b19aa08cebc16a154bcf83fee8ecc6439968b1e6db569c7.ico';
}
}
private function loadEmbeddedJsonData($html)
private function fetchData($html)
{
$json = [];
$scripts = $html->find('script');
foreach ($scripts as $script) {
@@ -69,12 +68,17 @@ favicon-63b2904a073c89b52b19aa08cebc16a154bcf83fee8ecc6439968b1e6db569c7.ico';
$end = strpos($script->innertext, '];') + 1;
// convert JSON to PHP array
$json = json_decode(substr($script->innertext, $start, $end - $start), true);
break;
$json = substr($script->innertext, $start, $end - $start);
try {
// TODO: fix broken json
return Json::decode($json);
} catch (\JsonException $e) {
return [];
}
}
}
return $json;
return [];
}
private function findJsonForShot($shot, $json)

View File

@@ -0,0 +1,86 @@
<?php
class DuvarOrgBridge extends BridgeAbstract
{
const NAME = 'Duvar.org - Haberler';
const MAINTAINER = 'yourname';
const URI = 'https://duvar.org';
const DESCRIPTION = 'Returns the latest articles from Duvar.org - News from Turkey and the world';
const CACHE_TIMEOUT = 3600; // 60min
const PARAMETERS = [[
'postcount' => [
'name' => 'Limit',
'type' => 'number',
'required' => true,
'title' => 'Maximum number of items to return',
'defaultValue' => 20,
],
'urlsuffix' => [
'name' => 'URL Suffix',
'type' => 'list',
'title' => 'Suffix for the URL to scrape a specific section',
'defaultValue' => 'Main',
'values' => [
'Main' => '',
'Balanced' => '/uyumlu',
'Protest' => '/muhalif',
'Center' => '/merkez',
'Alternative' => '/alternatif',
'Global' => '/global',
],
],
]];
public function collectData()
{
$postCount = $this->getInput('postcount');
$urlSuffix = $this->getInput('urlsuffix');
$url = self::URI . $urlSuffix;
$html = getSimpleHTMLDOM($url);
foreach ($html->find('article.news-item') as $data) {
if ($data === null) {
continue;
}
try {
$item = [];
$linkElement = $data->find('h2.news-title a', 0);
$titleElement = $data->find('h2.news-title a', 0);
$timestampElement = $data->find('time.meta-tag.date-tag', 0);
$contentElement = $data->find('div.news-description', 0);
if ($linkElement) {
$item['uri'] = $linkElement->getAttribute('href');
} else {
continue;
}
if ($titleElement) {
$item['title'] = trim($titleElement->plaintext);
} else {
continue;
}
if ($timestampElement) {
$item['timestamp'] = strtotime($timestampElement->plaintext);
} else {
$item['timestamp'] = time();
}
if ($contentElement) {
$item['content'] = trim($contentElement->plaintext);
} else {
$item['content'] = '';
}
$item['uid'] = hash('sha256', $item['title']);
$this->items[] = $item;
if (count($this->items) >= $postCount) {
break;
}
} catch (Exception $e) {
continue;
}
}
}
}

42
bridges/EASeedBridge.php Normal file
View File

@@ -0,0 +1,42 @@
<?php
class EASeedBridge extends BridgeAbstract
{
const NAME = 'EA Seed Blog';
const URI = 'https://www.ea.com/seed';
const DESCRIPTION = 'Posts from the EA Seed blog';
const MAINTAINER = 'thefranke';
const CACHE_TIMEOUT = 86400; // 24h
public function collectData()
{
$dom = getSimpleHTMLDOM(static::URI);
$dom = $dom->find('ea-grid', 0);
if (!$dom) {
throw new \Exception(sprintf('Unable to find css selector on `%s`', $url));
}
$dom = defaultLinkTo($dom, $this->getURI());
foreach ($dom->find('ea-tile') as $article) {
$a = $article->find('a', 0);
$date = $article->find('div', 1)->plaintext;
$title = $article->find('h3', 0)->plaintext;
$author = $article->find('div', 0)->plaintext;
$entry = getSimpleHTMLDOMCached($a->href, static::CACHE_TIMEOUT * 7 * 4);
$content = $entry->find('main', 0);
// remove header and links to other posts
$content->find('ea-header', 0)->outertext = '';
$content->find('ea-section', -1)->outertext = '';
$this->items[] = [
'title' => $title,
'author' => $author,
'uri' => $a->href,
'content' => $content,
'timestamp' => strtotime($date),
];
}
}
}

View File

@@ -5,15 +5,21 @@ class EBayBridge extends BridgeAbstract
const NAME = 'eBay';
const DESCRIPTION = 'Returns the search results from the eBay auctioning platforms';
const URI = 'https://www.eBay.com';
const MAINTAINER = 'wrobelda';
const MAINTAINER = 'NotsoanoNimus, wrobelda';
const PARAMETERS = [[
'url' => [
'name' => 'Search URL',
'title' => 'Copy the URL from your browser\'s address bar after searching for your items and paste it here',
'pattern' => '^(https:\/\/)?(www.)?ebay\.(com|com\.au|at|be|ca|ch|cn|es|fr|de|com\.hk|ie|it|com\.my|nl|ph|pl|com\.sg|co\.uk).*$',
'pattern' => '^(https:\/\/)?(www\.)?(befr\.|benl\.)?ebay\.(com|com\.au|at|be|ca|ch|cn|es|fr|de|com\.hk|ie|it|com\.my|nl|ph|pl|com\.sg|co\.uk)\/.*$',
'exampleValue' => 'https://www.ebay.com/sch/i.html?_nkw=atom+rss',
'required' => true,
]
],
'includesSearchLink' => [
'name' => 'Include Original Search Link',
'title' => 'Whether or not each feed item should include the original search query link to eBay which was used to find the given listing.',
'type' => 'checkbox',
'defaultValue' => false,
],
]];
public function getURI()
@@ -23,6 +29,10 @@ class EBayBridge extends BridgeAbstract
$uri = trim(preg_replace('/([?&])_sop=[^&]+(&|$)/', '$1', $this->getInput('url')), '?&/');
$uri .= (parse_url($uri, PHP_URL_QUERY) ? '&' : '?') . '_sop=10';
// Ensure the List View is used instead of the Gallery View.
$uri = trim(preg_replace('/[?&]_dmd=[^&]+(&|$)/i', '$1', $uri), '?&/');
$uri .= '&_dmd=1';
return $uri;
} else {
return parent::getURI();
@@ -46,7 +56,7 @@ class EBayBridge extends BridgeAbstract
});
if ($searchQuery) {
return $searchQuery[0];
return 'eBay - ' . $searchQuery[0];
}
return parent::getName();
@@ -61,44 +71,90 @@ class EBayBridge extends BridgeAbstract
$inexactMatches->remove();
}
// Remove "NEW LISTING" labels: we sort by the newest, so this is redundant.
foreach ($html->find('.LIGHT_HIGHLIGHT') as $new_listing_label) {
$new_listing_label->remove();
}
$results = $html->find('ul.srp-results > li.s-item');
foreach ($results as $listing) {
$item = [];
// Remove "NEW LISTING" label, we sort by the newest, so this is redundant
foreach ($listing->find('.LIGHT_HIGHLIGHT') as $new_listing_label) {
$new_listing_label->remove();
// Define a closure to shorten the ugliness of querying the current listing.
$find = function ($query, $altText = '') use ($listing) {
return $listing->find($query, 0)->plaintext ?? $altText;
};
$item['title'] = $find('.s-item__title');
if (!$item['title']) {
// Skip entries where the title cannot be found (for w/e reason).
continue;
}
$listingTitle = $listing->find('.s-item__title', 0);
if ($listingTitle) {
$item['title'] = $listingTitle->plaintext;
}
$subtitle = implode('', $listing->find('.s-item__subtitle'));
$listingUrl = $listing->find('.s-item__link', 0);
if ($listingUrl) {
$item['uri'] = $listingUrl->href;
// It appears there may be more than a single 'subtitle' subclass in the listing. Collate them.
$subtitles = $listing->find('.s-item__subtitle');
if (is_array($subtitles)) {
$subtitle = trim(implode(' ', array_column($subtitles, 'plaintext')));
} else {
$item['uri'] = null;
$subtitle = trim($subtitles->plaintext ?? '');
}
// Get the listing's link and uid.
$itemUri = $listing->find('.s-item__link', 0);
if ($itemUri) {
$item['uri'] = $itemUri->href;
}
if (preg_match('/.*\/itm\/(\d+).*/i', $item['uri'], $matches)) {
$item['uid'] = $matches[1];
}
$priceDom = $listing->find('.s-item__details > .s-item__detail > .s-item__price', 0);
$price = $priceDom->plaintext ?? 'N/A';
// Price should be fetched on its own so we can provide the alt text without complication.
$price = $find('.s-item__price', '[NO PRICE]');
$shippingFree = $listing->find('.s-item__details > .s-item__detail > .s-item__freeXDays', 0)->plaintext ?? '';
$localDelivery = $listing->find('.s-item__details > .s-item__detail > .s-item__localDelivery', 0)->plaintext ?? '';
$logisticsCost = $listing->find('.s-item__details > .s-item__detail > .s-item__logisticsCost', 0)->plaintext ?? '';
// Map a list of dynamic variable names to their subclasses within the listing.
// This is just a bit of sugar to make this cleaner and more maintainable.
$propertyMappings = [
'additionalPrice' => '.s-item__additional-price',
'discount' => '.s-item__discount',
'shippingFree' => '.s-item__freeXDays',
'localDelivery' => '.s-item__localDelivery',
'logisticsCost' => '.s-item__logisticsCost',
'location' => '.s-item__location',
'obo' => '.s-item__formatBestOfferEnabled',
'sellerInfo' => '.s-item__seller-info-text',
'bids' => '.s-item__bidCount',
'timeLeft' => '.s-item__time-left',
'timeEnd' => '.s-item__time-end',
];
$location = $listing->find('.s-item__details > .s-item__detail > .s-item__location', 0)->plaintext ?? '';
foreach ($propertyMappings as $k => $v) {
$$k = $find($v);
}
$sellerInfo = $listing->find('.s-item__seller-info-text', 0)->plaintext ?? '';
// When an additional price detail or discount is defined, create the 'discountLine'.
if ($additionalPrice || $discount) {
$discountLine = '<br /><em>('
. trim($additionalPrice ?? '')
. '; ' . trim($discount ?? '')
. ')</em>';
} else {
$discountLine = '';
}
// Prepend the time-left info with a comma if the right details were found.
$timeInfo = trim($timeLeft . ' ' . $timeEnd);
if ($timeInfo) {
$timeInfo = ', ' . $timeInfo;
}
// Set the listing type.
if ($bids) {
$listingTypeDetails = "Auction: {$bids}{$timeInfo}";
} else {
$listingTypeDetails = 'Buy It Now';
}
// Acquire the listing's primary image and atach it.
$image = $listing->find('.s-item__image-wrapper > img', 0);
if ($image) {
// Not quite sure why append fragment here
@@ -106,11 +162,23 @@ class EBayBridge extends BridgeAbstract
$item['enclosures'] = [$imageUrl];
}
// Include the original search link, if specified.
if ($this->getInput('includesSearchLink')) {
$searchLink = '<p><small><a target="_blank" href="' . e($this->getURI()) . '">View Search</a></small></p>';
} else {
$searchLink = '';
}
// Build the final item's content to display and add the item onto the list.
$item['content'] = <<<CONTENT
<p>$sellerInfo $location</p>
<p><span style="font-weight:bold">$price</span> $shippingFree $localDelivery $logisticsCost<span></span></p>
<p>$subtitle</p>
<p><strong>$price</strong> $obo ($listingTypeDetails)
$discountLine
<br /><small>$shippingFree $localDelivery $logisticsCost</small></p>
<p>{$subtitle}</p>
$searchLink
CONTENT;
$this->items[] = $item;
}
}

View File

@@ -50,7 +50,9 @@ class EZTVBridge extends BridgeAbstract
$eztv_uri = $this->getEztvUri();
$ids = explode(',', trim($this->getInput('ids')));
foreach ($ids as $id) {
$data = json_decode(getContents(sprintf('%s/api/get-torrents?imdb_id=%s', $eztv_uri, $id)));
$url = sprintf('%s/api/get-torrents?imdb_id=%s', $eztv_uri, $id);
$json = getContents($url);
$data = json_decode($json);
if (!isset($data->torrents)) {
// No results
continue;

View File

@@ -8,6 +8,12 @@ class EconomistBridge extends FeedExpander
const CACHE_TIMEOUT = 3600; //1hour
const DESCRIPTION = 'Returns the latest articles for the selected category';
const CONFIGURATION = [
'cookie' => [
'required' => false,
]
];
const PARAMETERS = [
'global' => [
'limit' => [
@@ -99,7 +105,24 @@ class EconomistBridge extends FeedExpander
protected function parseItem(array $item)
{
$dom = getSimpleHTMLDOM($item['uri']);
$headers = [];
if ($this->getOption('cookie')) {
$headers = [
'Authority: www.economist.com',
'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'Accept-language: en-US,en;q=0.9',
'Cache-control: max-age=0',
'Cookie: ' . $this->getOption('cookie'),
'Upgrade-insecure-requests: 1',
'User-agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36'
];
}
try {
$dom = getSimpleHTMLDOM($item['uri'], $headers);
} catch (Exception $e) {
$item['content'] = $e->getMessage();
return $item;
}
$article = $dom->find('#new-article-template', 0);
if ($article == null) {
@@ -204,6 +227,15 @@ class EconomistBridge extends FeedExpander
foreach ($elem->find('a.ds-link-with-arrow-icon') as $a) {
$a->parent->removeChild($a);
}
// Sections like "Leaders on day X"
foreach ($elem->find('div[data-tracking-id=content-well-chapter-list]') as $div) {
$div->parent->removeChild($div);
}
// "Explore more" section
foreach ($elem->find('h3[id=article-tags]') as $h3) {
$div = $h3->parent;
$div->parent->removeChild($div);
}
// The Economist puts infographics into iframes, which doesn't
// work in any of my readers. So this replaces iframes with

View File

@@ -9,6 +9,12 @@ class EconomistWorldInBriefBridge extends BridgeAbstract
const CACHE_TIMEOUT = 3600; // 1 hour
const DESCRIPTION = 'Returns stories from the World in Brief section';
const CONFIGURATION = [
'cookie' => [
'required' => false,
]
];
const PARAMETERS = [
'' => [
'splitGobbets' => [
@@ -41,19 +47,34 @@ class EconomistWorldInBriefBridge extends BridgeAbstract
public function collectData()
{
$html = getSimpleHTMLDOM(self::URI);
$gobbets = $html->find('._gobbets', 0);
$headers = [];
if ($this->getOption('cookie')) {
$headers = [
'Authority: www.economist.com',
'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'Accept-language: en-US,en;q=0.9',
'Cache-control: max-age=0',
'Cookie: ' . $this->getOption('cookie'),
'Upgrade-insecure-requests: 1',
'User-agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36'
];
}
$html = getSimpleHTMLDOM(self::URI, $headers);
$gobbets = $html->find('p[data-component="the-world-in-brief-paragraph"]');
if ($this->getInput('splitGobbets') == 1) {
$this->splitGobbets($gobbets);
} else {
$this->mergeGobbets($gobbets);
};
if ($this->getInput('agenda') == 1) {
$articles = $html->find('._articles', 0);
$this->collectArticles($articles);
$articles = $html->find('div[data-test-id="chunks"] > div > div', 0);
if ($articles != null) {
$this->collectArticles($articles);
}
}
if ($this->getInput('quote') == 1) {
$quote = $html->find('._quote-container', 0);
$quote = $html->find('blockquote[data-test-id="inspirational-quote"]', 0);
$this->addQuote($quote);
}
}
@@ -63,7 +84,7 @@ class EconomistWorldInBriefBridge extends BridgeAbstract
$today = new Datetime();
$today->setTime(0, 0, 0, 0);
$limit = $this->getInput('limit');
foreach ($gobbets->find('._gobbet') as $gobbet) {
foreach ($gobbets as $gobbet) {
$title = $gobbet->plaintext;
$match = preg_match('/[\.,]/', $title, $matches, PREG_OFFSET_CAPTURE);
if ($match > 0) {
@@ -89,7 +110,7 @@ class EconomistWorldInBriefBridge extends BridgeAbstract
$today = new Datetime();
$today->setTime(0, 0, 0, 0);
$contents = '';
foreach ($gobbets->find('._gobbet') as $gobbet) {
foreach ($gobbets as $gobbet) {
$contents .= "<p>{$gobbet->innertext}";
}
$this->items[] = [
@@ -106,10 +127,14 @@ class EconomistWorldInBriefBridge extends BridgeAbstract
$i = 0;
$today = new Datetime();
$today->setTime(0, 0, 0, 0);
foreach ($articles->find('._article') as $article) {
$title = $article->find('._headline', 0)->plaintext;
$image = $article->find('._main-image', 0);
$content = $article->find('._content', 0);
foreach ($articles->children() as $element) {
if ($element->tag != 'div') {
continue;
}
$image = $element->find('figure', 0);
$title = $element->find('h3', 0)->plaintext;
$content = $element->find('h3', 0)->parent();
$content->find('h3', 0)->outertext = '';
$res_content = '';
if ($image != null && $this->getInput('agendaPictures') == 1) {

View File

@@ -45,14 +45,14 @@ class EdfPricesBridge extends BridgeAbstract
}
// colors
$ulDom = $html->find('#tarif-de-l-offre-edf-tempo-current-date-html-year', 0)->nextSibling()->nextSibling()->nextSibling();
$ulDom = $html->find('#tarif-de-l-offre-tempo-edf-template-date-now-y', 0)->nextSibling()->nextSibling()->nextSibling();
$elementsDom = $ulDom->find('li');
if ($elementsDom && count($elementsDom) === 3) {
foreach ($elementsDom as $elementDom) {
$item = [];
$matches = [];
preg_match_all('/Jour (.*) : Heures (.*) : (.*) € \/ Heures (.*) : (.*) €/um', $elementDom->innertext, $matches, PREG_SET_ORDER, 0);
preg_match_all('/Jour (.*) : Heures (.*) : (.*)&nbsp;€ \/ Heures (.*) : (.*)&nbsp;€/um', $elementDom->innertext, $matches, PREG_SET_ORDER, 0);
if ($matches && count($matches[0]) === 6) {
for ($i = 0; $i < 2; $i++) {

View File

@@ -34,11 +34,9 @@ class ElloBridge extends BridgeAbstract
];
if (!empty($this->getInput('u'))) {
$postData = getContents(self::URI . 'api/v2/users/~' . urlencode($this->getInput('u')) . '/posts', $header) or
returnServerError('Unable to query Ello API.');
$postData = getContents(self::URI . 'api/v2/users/~' . urlencode($this->getInput('u')) . '/posts', $header);
} else {
$postData = getContents(self::URI . 'api/v2/posts?terms=' . urlencode($this->getInput('s')), $header) or
returnServerError('Unable to query Ello API.');
$postData = getContents(self::URI . 'api/v2/posts?terms=' . urlencode($this->getInput('s')), $header);
}
$postData = json_decode($postData);
@@ -117,7 +115,7 @@ class ElloBridge extends BridgeAbstract
$apiKey = $this->cache->get($cacheKey);
if (!$apiKey) {
$keyInfo = getContents(self::URI . 'api/webapp-token') or returnServerError('Unable to get token.');
$keyInfo = getContents(self::URI . 'api/webapp-token');
$apiKey = json_decode($keyInfo)->token->access_token;
$ttl = 60 * 60 * 20;
$this->cache->set($cacheKey, $apiKey, $ttl);

View File

@@ -0,0 +1,74 @@
<?php
class EpicGamesFreeBridge extends BridgeAbstract
{
const NAME = 'Epic Games Free Games';
const MAINTAINER = 'phantop';
const URI = 'https://store.epicgames.com/';
const DESCRIPTION = 'Returns the latest free games from Epic Games';
const PARAMETERS = [ [
'locale' => [
'name' => 'Language',
'type' => 'list',
'values' => [
'English' => 'en-US',
'العربية' => 'ar',
'Deutsch' => 'de',
'Español (Spain)' => 'es-ES',
'Español (LA)' => 'es-MX',
'Français' => 'fr',
'Italiano' => 'it',
'日本語' => 'ja',
'한국어' => 'ko',
'Polski' => 'pl',
'Português (Brasil)' => 'pt-BR',
'Русский' => 'ru',
'ไทย' => 'th',
'Türkçe' => 'tr',
'简体中文' => 'zh-CN',
'繁體中文' => 'zh-Hant',
],
'title' => 'Language for game information',
'defaultValue' => 'en-US',
],
'country' => [
'name' => 'Country',
'title' => 'Country store to check for deals',
'defaultValue' => 'US',
]
]];
public function collectData()
{
$url = 'https://store-site-backend-static.ak.epicgames.com/freeGamesPromotions?';
$params = [
'locale' => $this->getInput('locale'),
'country' => $this->getInput('country'),
'allowCountries' => $this->getInput('country'),
];
$url .= http_build_query($params);
$json = Json::decode(getContents($url));
$data = $json['data']['Catalog']['searchStore']['elements'];
foreach ($data as $element) {
if (!isset($element['promotions']['promotionalOffers'][0])) {
continue;
}
$item = [
'author' => $element['seller']['name'],
'content' => $element['description'],
'enclosures' => array_map(fn($item) => $item['url'], $element['keyImages']),
'timestamp' => strtotime($element['promotions']['promotionalOffers'][0]['promotionalOffers'][0]['startDate']),
'title' => $element['title'],
'url' => parent::getURI() . $this->getInput('locale') . '/p/' . $element['urlSlug'],
];
$this->items[] = $item;
}
}
public function getURI()
{
$uri = parent::getURI() . $this->getInput('locale') . '/free-games';
return $uri;
}
}

View File

@@ -31,7 +31,7 @@ class FDroidBridge extends BridgeAbstract
CURLOPT_NOBODY => true,
];
$reponse = getContents($url, [], $curlOptions, true);
$lastModified = $reponse['headers']['last-modified'][0] ?? null;
$lastModified = $reponse->getHeader('last-modified');
$timestamp = strtotime($lastModified ?? 'today');
return $timestamp;
}

View File

@@ -14,7 +14,7 @@ class FDroidRepoBridge extends BridgeAbstract
'name' => 'Repository URL',
'title' => 'Usually ends with /repo/',
'required' => true,
'exampleValue' => 'https://srv.tt-rss.org/fdroid/repo'
'exampleValue' => 'https://molly.im/fdroid/foss/fdroid/repo'
]
],
'Latest Updates' => [
@@ -35,7 +35,7 @@ class FDroidRepoBridge extends BridgeAbstract
'package' => [
'name' => 'Package Identifier',
'required' => true,
'exampleValue' => 'org.fox.ttrss'
'exampleValue' => 'im.molly.app'
]
]
];
@@ -45,11 +45,7 @@ class FDroidRepoBridge extends BridgeAbstract
public function collectData()
{
if (!extension_loaded('zip')) {
throw new \Exception('FDroidRepoBridge requires the php-zip extension');
}
$this->repo = $this->getRepo();
$this->repo = $this->fetchData();
switch ($this->queriedContext) {
case 'Latest Updates':
$this->getAllUpdates();
@@ -58,63 +54,15 @@ class FDroidRepoBridge extends BridgeAbstract
$this->getPackage($this->getInput('package'));
break;
default:
returnServerError('Unimplemented Context (collectData)');
throw new \Exception('Unimplemented Context (collectData)');
}
}
public function getURI()
{
if (empty($this->queriedContext)) {
return parent::getURI();
}
$url = rtrim($this->GetInput('url'), '/');
return strstr($url, '?', true) ?: $url;
}
public function getName()
{
if (empty($this->queriedContext)) {
return parent::getName();
}
$name = $this->repo['repo']['name'];
switch ($this->queriedContext) {
case 'Latest Updates':
return $name;
case 'Follow Package':
return $this->getInput('package') . ' - ' . $name;
default:
returnServerError('Unimplemented Context (getName)');
}
}
private function getRepo()
private function fetchData()
{
$url = $this->getURI();
// Get repo information (only available as JAR)
$jar = getContents($url . '/index-v1.jar');
$jar_loc = tempnam(sys_get_temp_dir(), '');
file_put_contents($jar_loc, $jar);
// JAR files are specially formatted ZIP files
$jar = new \ZipArchive();
if ($jar->open($jar_loc) !== true) {
unlink($jar_loc);
throw new \Exception('Failed to extract archive');
}
// Get file pointer to the relevant JSON inside
$fp = $jar->getStream('index-v1.json');
if (!$fp) {
returnServerError('Failed to get file pointer');
}
$data = json_decode(stream_get_contents($fp), true);
fclose($fp);
$jar->close();
unlink($jar_loc);
$json = getContents($url . '/index-v1.json');
$data = Json::decode($json);
return $data;
}
@@ -158,9 +106,9 @@ class FDroidRepoBridge extends BridgeAbstract
$summary = $lang['summary'] ?? $app['summary'] ?? '';
$description = markdownToHtml(trim($lang['description'] ?? $app['description'] ?? 'None'));
$whatsNew = markdownToHtml(trim($lang['whatsNew'] ?? 'None'));
$website = $this->link($lang['webSite'] ?? $app['webSite'] ?? $app['authorWebSite'] ?? null);
$source = $this->link($app['sourceCode'] ?? null);
$issueTracker = $this->link($app['issueTracker'] ?? null);
$website = $this->createAnchor($lang['webSite'] ?? $app['webSite'] ?? $app['authorWebSite'] ?? null);
$source = $this->createAnchor($app['sourceCode'] ?? null);
$issueTracker = $this->createAnchor($app['issueTracker'] ?? null);
$license = $app['license'] ?? 'None';
$item['content'] = <<<EOD
{$icon}
@@ -182,7 +130,7 @@ EOD;
private function getPackage($package)
{
if (!isset($this->repo['packages'][$package])) {
returnClientError('Invalid Package Name');
throw new \Exception('Invalid Package Name');
}
$package = $this->repo['packages'][$package];
@@ -192,7 +140,7 @@ EOD;
$item['uri'] = $this->getURI() . '/' . $version['apkName'];
$item['title'] = $version['versionName'];
$item['timestamp'] = date(DateTime::ISO8601, (int) ($version['added'] / 1000));
$item['uid'] = $version['versionCode'];
$item['uid'] = (string) $version['versionCode'];
$size = round($version['size'] / 1048576, 1); // Bytes -> MB
$sdk_link = 'https://developer.android.com/studio/releases/platforms';
$item['content'] = <<<EOD
@@ -208,11 +156,42 @@ EOD;
}
}
private function link($url)
public function getURI()
{
if (empty($this->queriedContext)) {
return parent::getURI();
}
$url = rtrim($this->getInput('url'), '/');
if (strstr($url, '?', true)) {
return strstr($url, '?', true);
} else {
return $url;
}
}
public function getName()
{
if (empty($this->queriedContext)) {
return parent::getName();
}
$name = $this->repo['repo']['name'];
switch ($this->queriedContext) {
case 'Latest Updates':
return $name;
case 'Follow Package':
return $this->getInput('package') . ' - ' . $name;
default:
throw new \Exception('Unimplemented Context (getName)');
}
}
private function createAnchor($url)
{
if (empty($url)) {
return null;
}
return '<a href="' . $url . '">' . $url . '</a>';
return sprintf('<a href="%s">%s</a>', $url, $url);
}
}

View File

@@ -64,6 +64,7 @@ TEXT;
$this->collectExpandableDatas($feed);
} catch (HttpException $e) {
$this->logger->warning(sprintf('Exception in FeedMergeBridge: %s', create_sane_exception_message($e)));
// This feed item might be spammy. Considering dropping it.
$this->items[] = [
'title' => 'RSS-Bridge: ' . $e->getMessage(),
// Give current time so it sorts to the top
@@ -71,7 +72,7 @@ TEXT;
];
continue;
} catch (\Exception $e) {
if (str_starts_with($e->getMessage(), 'Unable to parse xml')) {
if (str_starts_with($e->getMessage(), 'Failed to parse xml')) {
// Allow this particular exception from FeedExpander
$this->logger->warning(sprintf('Exception in FeedMergeBridge: %s', create_sane_exception_message($e)));
continue;
@@ -83,6 +84,8 @@ TEXT;
}
}
// If $this->items is empty we should consider throw exception here
// Sort by timestamp descending
usort($this->items, function ($a, $b) {
$t1 = $a['timestamp'] ?? $a['uri'] ?? $a['title'];

View File

@@ -187,7 +187,6 @@ class FicbookBridge extends BridgeAbstract
$fixed_date = str_replace(' г.', '', $fixed_date);
if ($fixed_date === $date) {
Debug::log('Unable to fix date: ' . $date);
return null;
}

View File

@@ -15,6 +15,12 @@ class FilterBridge extends FeedExpander
'exampleValue' => 'https://lorem-rss.herokuapp.com/feed?unit=day',
'required' => true,
],
'name' => [
'name' => 'Feed name (optional)',
'type' => 'text',
'exampleValue' => 'My feed',
'required' => false,
],
'filter' => [
'name' => 'Filter (regular expression!!!)',
'required' => false,
@@ -77,7 +83,7 @@ class FilterBridge extends FeedExpander
{
$url = $this->getInput('url');
if (!Url::validate($url)) {
returnClientError('The url parameter must either refer to http or https protocol.');
throw new \Exception('The url parameter must either refer to http or https protocol.');
}
$this->collectExpandableDatas($this->getURI());
}
@@ -158,11 +164,18 @@ class FilterBridge extends FeedExpander
public function getURI()
{
$url = $this->getInput('url');
if (empty($url)) {
$url = parent::getURI();
if ($url) {
return $url;
}
return parent::getURI();
}
return $url;
public function getName()
{
$name = $this->getInput('name');
if ($name) {
return $name;
}
return parent::getName();
}
}

View File

@@ -44,8 +44,6 @@ class FolhaDeSaoPauloBridge extends FeedExpander
$item['content'] = $text;
$item['uri'] = explode('*', $item['uri'])[1];
}
} else {
Debug::log('???: ' . $item['uri']);
}
} else {
$item['uri'] = explode('*', $item['uri'])[1];
@@ -58,13 +56,11 @@ class FolhaDeSaoPauloBridge extends FeedExpander
{
$feed_input = $this->getInput('feed');
if (substr($feed_input, 0, strlen(self::URI)) === self::URI) {
Debug::log('Input:: ' . $feed_input);
$feed_url = $feed_input;
} else {
/* TODO: prepend `/` if missing */
$feed_url = self::URI . '/' . $this->getInput('feed');
}
Debug::log('URL: ' . $feed_url);
$limit = $this->getInput('amount');
$this->collectExpandableDatas($feed_url, $limit);
}

View File

@@ -0,0 +1,25 @@
<?php
class ForensicArchitectureBridge extends BridgeAbstract
{
const NAME = 'Forensic Architecture';
const URI = 'https://forensic-architecture.org/';
const DESCRIPTION = 'Generates content feeds from forensic-architecture.org';
const MAINTAINER = 'tillcash';
public function collectData()
{
$url = 'https://forensic-architecture.org/api/fa/v1/investigations';
$jsonData = json_decode(getContents($url));
foreach ($jsonData->investigations as $investigation) {
$this->items[] = [
'content' => $investigation->abstract,
'timestamp' => $investigation->publication_date,
'title' => $investigation->title,
'uid' => $investigation->id,
'uri' => self::URI . 'investigation/' . $investigation->slug,
];
}
}
}

View File

@@ -0,0 +1,78 @@
<?php
class FragDenStaatBridge extends BridgeAbstract
{
const MAINTAINER = 'swofl';
const NAME = 'FragDenStaat';
const URI = 'https://fragdenstaat.de';
const CACHE_TIMEOUT = 2 * 60 * 60; // 2h
const DESCRIPTION = 'Get latest blog posts from FragDenStaat Exklusiv';
const PARAMETERS = [ [
'qLimit' => [
'name' => 'Query Limit',
'title' => 'Amount of articles to query',
'type' => 'number',
'defaultValue' => 5,
],
] ];
protected function parseTeaser($teaser)
{
$result = [];
$header = $teaser->find('h3 > a', 0);
$result['title'] = $header->plaintext;
$result['uri'] = static::URI . $header->href;
$result['enclosures'] = [];
$result['enclosures'][] = $teaser->find('img', 0)->src;
$result['uid'] = hash('sha256', $result['title']);
$result['timestamp'] = strtotime($teaser->find('time', 0)->getAttribute('datetime'));
return $result;
}
public function collectData()
{
$html = getSimpleHTMLDOM(self::URI . '/artikel/exklusiv/');
$queryLimit = (int) $this->getInput('qLimit');
if ($queryLimit > 12) {
$queryLimit = 12;
}
$teasers = [];
$teaserElements = $html->find('article');
for ($i = 0; $i < $queryLimit; $i++) {
array_push($teasers, $this->parseTeaser($teaserElements[$i]));
}
foreach ($teasers as $article) {
$articleHtml = getSimpleHTMLDOMCached($article['uri'], static::CACHE_TIMEOUT * 6);
$articleCore = $articleHtml->find('article.blog-article', 0);
$content = '';
$lead = $articleCore->find('div.lead > p', 0)->innertext;
$content .= '<h2>' . $lead . '</h2>';
foreach ($articleCore->find('div.blog-content > p, div.blog-content > h3') as $paragraph) {
$content .= $paragraph->outertext;
}
$article['content'] = '<img src="' . $article['enclosures'][0] . '"/>' . $content;
$article['author'] = '';
foreach ($articleCore->find('a[rel="author"]') as $author) {
$article['author'] .= $author->innertext . ', ';
}
$article['author'] = rtrim($article['author'], ', ');
$this->items[] = $article;
}
}
}

View File

@@ -32,7 +32,7 @@ class FunkBridge extends BridgeAbstract
$url .= '?size=' . $this->getInput('max');
}
$jsonString = getContents($url) or returnServerError('No contents received!');
$jsonString = getContents($url);
$json = json_decode($jsonString, true);
foreach ($json['list'] as $element) {

View File

@@ -676,7 +676,7 @@ class FurAffinityBridge extends BridgeAbstract
$name = parent::getName();
if ($this->getOption('aCookie') !== null) {
$username = $this->loadCacheValue('username');
if ($username !== null) {
if ($username) {
$name = $username . '\'s ' . parent::getName();
}
}

View File

@@ -31,7 +31,7 @@ class GBAtempBridge extends BridgeAbstract
$img = $this->findItemImage($newsItem, 'a.news_image');
$time = $this->findItemDate($newsItem);
$author = $newsItem->find('a.username', 0)->plaintext;
$title = $this->decodeHtmlEntities($newsItem->find('h3.news_title', 0)->plaintext);
$title = $this->decodeHtmlEntities($newsItem->find('h2.news_title', 0)->plaintext);
$content = $this->fetchPostContent($url, self::URI);
$this->items[] = $this->buildItem($url, $title, $author, $time, $img, $content);
unset($newsItem); // Some items are heavy, freeing the item proactively helps saving memory
@@ -41,7 +41,7 @@ class GBAtempBridge extends BridgeAbstract
foreach ($html->find('li.portal_review') as $reviewItem) {
$url = urljoin(self::URI, $reviewItem->find('a.review_boxart', 0)->href);
$img = $this->findItemImage($reviewItem, 'a.review_boxart');
$title = $this->decodeHtmlEntities($reviewItem->find('h2.review_title', 0)->plaintext);
$title = $this->decodeHtmlEntities($reviewItem->find('div.review_title', 0)->find('h2', 0)->plaintext);
$content = getSimpleHTMLDOMCached($url);
$author = $content->find('span.author--name', 0)->plaintext;
$time = $this->findItemDate($content);

View File

@@ -0,0 +1,164 @@
<?php
use Facebook\WebDriver\Exception\NoSuchElementException;
use Facebook\WebDriver\Remote\RemoteWebElement;
use Facebook\WebDriver\WebDriverBy;
use Facebook\WebDriver\WebDriverExpectedCondition;
class GULPProjekteBridge extends WebDriverAbstract
{
const NAME = 'GULP Projekte';
const URI = 'https://www.gulp.de/gulp2/g/projekte';
const DESCRIPTION = 'Projektsuche';
const MAINTAINER = 'hleskien';
const MAXITEMS = 60;
/**
* Adds accept language german to the Chrome Options.
*
* @return Facebook\WebDriver\Chrome\ChromeOptions
*/
protected function getBrowserOptions()
{
$chromeOptions = parent::getBrowserOptions();
$chromeOptions->addArguments(['--accept-lang=de']);
return $chromeOptions;
}
/**
* @throws Facebook\WebDriver\Exception\NoSuchElementException
* @throws Facebook\WebDriver\Exception\TimeoutException
*/
protected function clickAwayCookieBanner()
{
$this->getDriver()->wait()->until(WebDriverExpectedCondition::visibilityOfElementLocated(WebDriverBy::id('onetrust-reject-all-handler')));
$buttonRejectCookies = $this->getDriver()->findElement(WebDriverBy::id('onetrust-reject-all-handler'));
$buttonRejectCookies->click();
$this->getDriver()->wait()->until(WebDriverExpectedCondition::invisibilityOfElementLocated(WebDriverBy::id('onetrust-reject-all-handler')));
}
/**
* @throws Facebook\WebDriver\Exception\NoSuchElementException
* @throws Facebook\WebDriver\Exception\TimeoutException
*/
protected function clickNextPage()
{
$nextPage = $this->getDriver()->findElement(WebDriverBy::xpath('//app-linkable-paginator//li[@id="next-page"]/a'));
$href = $nextPage->getAttribute('href');
$nextPage->click();
$this->getDriver()->wait()->until(WebDriverExpectedCondition::not(
WebDriverExpectedCondition::presenceOfElementLocated(
WebDriverBy::xpath('//app-linkable-paginator//li[@id="next-page"]/a[@href="' . $href . '"]')
)
));
}
/**
* Returns the uri of the 'Projektanbieter' logo or false if there is
* no logo present in the item.
*
* @return string | false
*/
protected function getLogo(RemoteWebElement $item)
{
try {
$logo = $item->findElement(WebDriverBy::tagName('img'))->getAttribute('src');
if (str_starts_with($logo, 'http')) {
// different domain
return $logo;
} else {
// relative path
$remove = substr(self::URI, strrpos(self::URI, '/') + 1);
return substr(self::URI, 0, -strlen($remove)) . $logo;
}
} catch (NoSuchElementException $e) {
return false;
}
}
/**
* Converts a string like "vor einigen Minuten" into a reasonable timestamp.
* Long and complicated, but we don't want to be more specific than
* the information we have available.
*
* @throws Exception If the DateInterval can't be parsed.
*/
protected function getTimestamp(string $timeAgo): int
{
$dateTime = new DateTime();
$dateArray = explode(' ', $dateTime->format('Y m d H i s'));
$quantityStr = explode(' ', $timeAgo)[1];
// convert possible word into a number
if (in_array($quantityStr, ['einem', 'einer', 'einigen'])) {
$quantity = 1;
} else {
$quantity = intval($quantityStr);
}
// subtract time ago + inferior units for lower precision
if (str_contains($timeAgo, 'Sekunde')) {
$interval = new DateInterval('PT' . $quantity . 'S');
} elseif (str_contains($timeAgo, 'Minute')) {
$interval = new DateInterval('PT' . $quantity . 'M' . $dateArray[5] . 'S');
} elseif (str_contains($timeAgo, 'Stunde')) {
$interval = new DateInterval('PT' . $quantity . 'H' . $dateArray[4] . 'M' . $dateArray[5] . 'S');
} elseif (str_contains($timeAgo, 'Tag')) {
$interval = new DateInterval('P' . $quantity . 'DT' . $dateArray[3] . 'H' . $dateArray[4] . 'M' . $dateArray[5] . 'S');
} else {
throw new UnexpectedValueException($timeAgo);
}
$dateTime = $dateTime->sub($interval);
return $dateTime->getTimestamp();
}
/**
* The main loop which clicks through search result pages and puts
* the content into the $items array.
*
* @throws Facebook\WebDriver\Exception\NoSuchElementException
* @throws Facebook\WebDriver\Exception\TimeoutException
*/
public function collectData()
{
parent::collectData();
try {
$this->clickAwayCookieBanner();
$this->setIcon($this->getDriver()->findElement(WebDriverBy::xpath('//link[@rel="shortcut icon"]'))->getAttribute('href'));
while (true) {
$items = $this->getDriver()->findElements(WebDriverBy::tagName('app-project-view'));
foreach ($items as $item) {
$feedItem = [];
$heading = $item->findElement(WebDriverBy::xpath('.//app-heading-tag/h1/a'));
$feedItem['title'] = $heading->getText();
$feedItem['uri'] = 'https://www.gulp.de' . $heading->getAttribute('href');
$info = $item->findElement(WebDriverBy::tagName('app-icon-info-list'));
if ($logo = $this->getLogo($item)) {
$feedItem['enclosures'] = [$logo];
}
if (str_contains($info->getText(), 'Projektanbieter:')) {
$feedItem['author'] = $info->findElement(WebDriverBy::xpath('.//li/span[2]/span'))->getText();
} else {
// mostly "Direkt vom Auftraggeber" or "GULP Agentur"
$feedItem['author'] = $item->findElement(WebDriverBy::tagName('b'))->getText();
}
$feedItem['content'] = $item->findElement(WebDriverBy::xpath('.//p[@class="description"]'))->getText();
$timeAgo = $item->findElement(WebDriverBy::xpath('.//small[contains(@class, "time-ago")]'))->getText();
$feedItem['timestamp'] = $this->getTimestamp($timeAgo);
$this->items[] = $feedItem;
}
if (count($this->items) < self::MAXITEMS) {
$this->clickNextPage();
} else {
break;
}
}
} finally {
$this->cleanUp();
}
}
}

View File

@@ -28,6 +28,8 @@ class GameBananaBridge extends BridgeAbstract
return 'https://images.gamebanana.com/static/img/favicon/favicon.ico';
}
private $title;
public function collectData()
{
$url = 'https://api.gamebanana.com/Core/List/New?itemtype=Mod&page=1&gameid=' . $this->getInput('gid');
@@ -38,7 +40,7 @@ class GameBananaBridge extends BridgeAbstract
$json_list = json_decode($api_response, true); // Get first page mod list
$url = 'https://api.gamebanana.com/Core/Item/Data?itemtype[]=Game&fields[]=name&itemid[]=' . $this->getInput('gid');
$fields = 'name,Owner().name,text,screenshots,Files().aFiles(),date,Url().sProfileUrl(),udate';
$fields = 'name,Owner().name,text,screenshots,Files().aFiles(),date,Url().sProfileUrl(),udate,Updates().aLatestUpdates(),Category().name,RootCategory().name';
foreach ($json_list as $element) { // Build api request to minimize API calls
$mid = $element[1];
$url .= '&itemtype[]=Mod&fields[]=' . $fields . '&itemid[]=' . $mid;
@@ -50,11 +52,18 @@ class GameBananaBridge extends BridgeAbstract
array_shift($json_list); // Take title from API request and remove from json
foreach ($json_list as $element) {
// Trashed mod IDs are still picked up and return null; skip
if ($element[0] == null) {
continue;
}
$item = [];
$item['uri'] = $element[6];
$item['comments'] = $item['uri'] . '#PostsListModule';
$item['title'] = $element[0];
$item['author'] = $element[1];
$item['categories'][] = $element[9];
$item['categories'][] = $element[10];
$item['timestamp'] = $element[5];
if ($this->getInput('updates')) {
@@ -72,6 +81,22 @@ class GameBananaBridge extends BridgeAbstract
foreach ($img_list as $img_element) {
$item['content'] .= '<img src="https://images.gamebanana.com/img/ss/mods/' . $img_element['_sFile'] . '"/>';
}
// Get updates from element[8], if applicable
if ($this->getInput('updates') && count($element[8]) > 0) {
$update = $element[8][0];
$item['content'] .= '<br><strong>Update:</strong> ' . $update['_sTitle'];
if ($update['_sText'] != '') {
$item['content'] .= '<br>' . $update['_sText'];
}
foreach ($update['_aChangeLog'] as $change) {
if ($change['cat'] == '') {
$change['cat'] = 'Change';
}
$item['content'] .= '<br><em>' . $change['cat'] . '</em>: ' . $change['text'];
}
$item['content'] .= '<br><hr>';
}
$item['content'] .= '<br>' . $element[2];
$item['uid'] = $item['uri'] . $item['title'] . $item['timestamp'];

View File

@@ -21,6 +21,10 @@ class GatesNotesBridge extends BridgeAbstract
$rawContent = getContents($apiUrl);
$cleanedContent = trim($rawContent, '"');
$cleanedContent = str_replace([
'<string xmlns="http://schemas.microsoft.com/2003/10/Serialization/">',
'</string>'
], '', $cleanedContent);
$cleanedContent = str_replace('\r\n', "\n", $cleanedContent);
$cleanedContent = stripslashes($cleanedContent);

View File

@@ -33,7 +33,7 @@ class GelbooruBridge extends BridgeAbstract
return $this->getURI()
. 'index.php?&page=dapi&s=post&q=index&json=1&pid=' . $this->getInput('p')
. '&limit=' . $this->getInput('l')
. '&tags=' . urlencode($this->getInput('t'));
. '&tags=' . urlencode($this->getInput('t') ?? '');
}
/*
@@ -76,18 +76,16 @@ class GelbooruBridge extends BridgeAbstract
public function collectData()
{
$content = getContents($this->getFullURI());
// $content is empty string
$url = $this->getFullURI();
$content = getContents($url);
// Most other Gelbooru-based boorus put their content in the root of
// the JSON. This check is here for Bridges that inherit from this one
$posts = json_decode($content);
if (isset($posts->post)) {
$posts = $posts->post;
if ($content === '') {
return;
}
if (is_null($posts)) {
returnServerError('No posts found.');
$posts = Json::decode($content, false);
if (isset($posts->post)) {
$posts = $posts->post;
}
foreach ($posts as $post) {

View File

@@ -2,11 +2,11 @@
class GenshinImpactBridge extends BridgeAbstract
{
const MAINTAINER = 'corenting';
const NAME = 'Genshin Impact';
const URI = 'https://genshin.mihoyo.com/en/news';
const CACHE_TIMEOUT = 7200; // 2h
const DESCRIPTION = 'News from the Genshin Impact website';
const URI = 'https://genshin.hoyoverse.com/en/news';
const CACHE_TIMEOUT = 18000; // 5h
const DESCRIPTION = 'Latest news from the Genshin Impact website';
const MAINTAINER = 'Miicat_47';
const PARAMETERS = [
[
'category' => [
@@ -25,37 +25,31 @@ class GenshinImpactBridge extends BridgeAbstract
public function collectData()
{
$category = $this->getInput('category');
$url = 'https://genshin.mihoyo.com/content/yuanshen/getContentList';
$url = $url . '?pageSize=5&pageNum=1&channelId=' . $category;
$url = 'https://api-os-takumi-static.hoyoverse.com/content_v2_user/app/a1b1f9d3315447cc/getContentList?iAppId=32&iChanId=395&iPageSize=5&iPage=1&sLangKey=en-us';
$api_response = getContents($url);
$json_list = json_decode($api_response, true);
$json_list = Json::decode($api_response);
foreach ($json_list['data']['list'] as $json_item) {
$article_url = 'https://genshin.mihoyo.com/content/yuanshen/getContent';
$article_url = $article_url . '?contentId=' . $json_item['contentId'];
$article_res = getContents($article_url);
$article_json = json_decode($article_res, true);
$article_time = $article_json['data']['start_time'];
$timezone = 'Asia/Shanghai';
$article_timestamp = new DateTime($article_time, new DateTimeZone($timezone));
$article_html = str_get_html($json_item['sContent']);
// Check if article contains a embed YouTube video
$exp_youtube = '#https://[w\.]+youtube\.com/embed/([\w]+)#m';
if (preg_match($exp_youtube, $article_html, $matches)) {
// Replace the YouTube embed with a YouTube link
$yt_embed = $article_html->find('div[class="ttr-video-frame"]', 0);
$yt_link = sprintf('<a href="https://youtube.com/watch?v=%1$s">https://youtube.com/watch?v=%1$s</a>', $matches[1]);
$article_html = str_replace($yt_embed, $yt_link, $article_html);
}
$item = [];
$item['title'] = $article_json['data']['title'];
$item['timestamp'] = $article_timestamp->format('U');
$item['content'] = $article_json['data']['content'];
$item['uri'] = $this->getArticleUri($json_item);
$item['id'] = $json_item['contentId'];
$item['title'] = $json_item['sTitle'];
$item['timestamp'] = $json_item['dtStartTime'];
$item['content'] = $article_html;
$item['uri'] = 'https://genshin.hoyoverse.com/en/news/detail/' . $json_item['iInfoId'];
$item['id'] = $json_item['iInfoId'];
// Picture
foreach ($article_json['data']['ext'] as $ext) {
if ($ext['arrtName'] == 'banner' && count($ext['value']) == 1) {
$item['enclosures'] = [$ext['value'][0]['url']];
break;
}
}
$json_ext = Json::decode($json_item['sExt']);
$item['enclosures'] = [$json_ext['banner'][0]['url']];
$this->items[] = $item;
}
@@ -63,11 +57,6 @@ class GenshinImpactBridge extends BridgeAbstract
public function getIcon()
{
return 'https://genshin.mihoyo.com/favicon.ico';
}
private function getArticleUri($json_item)
{
return 'https://genshin.mihoyo.com/en/news/detail/' . $json_item['contentId'];
return 'https://genshin.hoyoverse.com/favicon.ico';
}
}

View File

@@ -586,16 +586,18 @@ class GithubTrendingBridge extends BridgeAbstract
'Monthly' => 'monthly',
],
'defaultValue' => 'today'
],
'spokenLanguage' => [
'name' => 'Spoken Language Code',
'type' => 'text',
'exampleValue' => 'en',
]
]
];
public function collectData()
{
$params = ['since' => urlencode($this->getInput('date_range'))];
$url = self::URI . '/' . $this->getInput('language') . '?' . http_build_query($params);
$url = $this->constructUrl();
$html = getSimpleHTMLDOM($url);
$this->items = [];
@@ -630,4 +632,32 @@ class GithubTrendingBridge extends BridgeAbstract
return parent::getName();
}
private function constructUrl()
{
$url = self::URI;
$language = $this->getInput('language');
$dateRange = $this->getInput('date_range');
$spokenLanguage = $this->getInput('spokenLanguage');
if (!empty($language)) {
$url .= '/' . $language;
}
$queryParams = [];
if (!empty($dateRange)) {
$queryParams['since'] = $dateRange;
}
if (!empty($spokenLanguage)) {
$queryParams['spoken_language_code'] = trim($spokenLanguage);
}
if (!empty($queryParams)) {
$url .= '?' . http_build_query($queryParams);
}
return $url;
}
}

View File

@@ -41,8 +41,7 @@ class GlowficBridge extends BridgeAbstract
$first_page = 1;
}
for ($page_offset = $first_page; $page_offset <= $metadata['Last-Page']; $page_offset++) {
$jsonContents = getContents($url . '/replies?page=' . $page_offset) or
returnClientError('Could not retrieve replies for page ' . $page_offset . '.');
$jsonContents = getContents($url . '/replies?page=' . $page_offset);
$replies = json_decode($jsonContents);
foreach ($replies as $reply) {
$item = [];
@@ -75,8 +74,9 @@ class GlowficBridge extends BridgeAbstract
private function getPost()
{
$url = $this->getAPIURI();
$jsonPost = getContents($url) or returnClientError('Could not retrieve post metadata.');
$jsonPost = getContents($url);
$post = json_decode($jsonPost);
return $post;
}

View File

@@ -106,10 +106,33 @@ class GolemBridge extends FeedExpander
$article = $page->find('article', 0);
//built youtube iframes
foreach ($article->find('.embedcontent') as &$embedcontent) {
$ytscript = $embedcontent->find('script', 0);
if (preg_match('/(www.youtube.com.*?)\"/', $ytscript->innertext, $link)) {
$link = 'https://' . str_replace('\\', '', $link[1]);
$embedcontent->innertext .= <<<EOT
<iframe width="560" height="315" src="$link" title="YouTube video player" frameborder="0"
allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share"
referrerpolicy="strict-origin-when-cross-origin" allowfullscreen></iframe>';
EOT;
}
}
//built golem videos
foreach ($article->find('.gvideofig') as &$embedcontent) {
if (preg_match('/gvideo_(.*)/', $embedcontent->id, $videoid)) {
$embedcontent->innertext .= <<<EOT
<video class="rmp-object-fit-contain rmp-video" x-webkit-airplay="allow" controlslist="nodownload" tabindex="-1"
preload="metadata" src="https://video.golem.de/download/$videoid[1]"></video>
EOT;
}
}
// delete known bad elements
foreach (
$article->find('div[id*="adtile"], #job-market, #seminars, iframe,
div.gbox_affiliate, div.toc, .embedcontent, script') as $bad
div.gbox_affiliate, div.toc') as $bad
) {
$bad->remove();
}
@@ -129,7 +152,7 @@ class GolemBridge extends FeedExpander
$img->src = $img->getAttribute('data-src-full');
}
foreach ($content->find('p, h1, h2, h3, img[src*="."]') as $element) {
foreach ($content->find('p, h1, h2, h3, img[src*="."], iframe, video') as $element) {
$item .= $element;
}

125
bridges/GovTrackBridge.php Normal file
View File

@@ -0,0 +1,125 @@
<?php
class GovTrackBridge extends BridgeAbstract
{
const NAME = 'GovTrack';
const MAINTAINER = 'phantop';
const URI = 'https://www.govtrack.us/';
const DESCRIPTION = 'Returns posts and bills from GovTrack.us';
const PARAMETERS = [[
'feed' => [
'name' => 'Feed to track',
'type' => 'list',
'defaultValue' => 'posts',
'values' => [
'All Legislative Activity' => 'bill-activity',
'Bill Summaries' => 'bill-summaries',
'Legislation Coming Up' => 'coming-up',
'Major Legislative Activity' => 'major-bill-activity',
'New Bills and Resolutions' => 'introduced-bills',
'New Laws' => 'enacted-bills',
'Posts from Us' => 'posts'
]
],
'limit' => self::LIMIT
]];
public function collectData()
{
$html = getSimpleHTMLDOMCached($this->getURI());
if ($this->getInput('feed') != 'posts') {
$this->collectEvent($html);
return;
}
$html = defaultLinkTo($html, parent::getURI());
$limit = $this->getInput('limit') ?? 10;
foreach ($html->find('section') as $element) {
if (--$limit == 0) {
break;
}
$info = explode(' ', $element->find('p', 0)->innertext);
$item = [
'categories' => [implode(' ', array_slice($info, 4))],
'timestamp' => strtotime(implode(' ', array_slice($info, 0, 3))),
'title' => $element->find('a', 0)->innertext,
'uri' => $element->find('a', 0)->href,
];
$html = getSimpleHTMLDOMCached($item['uri']);
$html = defaultLinkTo($html, parent::getURI());
$content = $html->find('#content .col-md', 1);
$info = explode(' by ', $content->find('p', 0)->plaintext);
$content->removeChild($content->firstChild());
$item['author'] = implode(' ', array_slice($info, 1));
$item['content'] = $content->innertext;
$this->items[] = $item;
}
}
private function collectEvent($html)
{
$opt = [];
preg_match('/"csrfmiddlewaretoken" value="(.*)"/', $html, $opt);
$header = [
"cookie: csrftoken=$opt[1]",
"x-csrftoken: $opt[1]",
'referer: ' . parent::getURI(),
];
preg_match('/var selected_feed = "(.*)";/', $html, $opt);
$post = [
'count' => $this->getInput('limit') ?? 20,
'feed' => $opt[1]
];
$opt = [ CURLOPT_POSTFIELDS => $post ];
$html = getContents(parent::getURI() . 'events/_load_events', $header, $opt);
$html = defaultLinkTo(str_get_html($html), parent::getURI());
foreach ($html->find('.tracked_event') as $event) {
$bill = $event->find('.event_title a, .event_body a', 0);
$date = explode(' ', $event->find('.event_date', 0)->plaintext);
preg_match('/Sponsor:(.*)\n/', $event->plaintext, $opt);
$item = [
'author' => $opt[1] ?? '',
'content' => $event->find('td', 1)->innertext,
'enclosures' => [$event->find('img', 0)->src],
'timestamp' => strtotime(implode(' ', array_slice($date, 2))),
'title' => explode(': ', $bill->innertext)[0],
'uri' => $bill->href,
];
foreach ($event->find('.event_title, .event_type span') as $tag) {
if (!$tag->find('a', 0)) {
$item['categories'][] = $tag->plaintext;
}
}
$this->items[] = $item;
}
}
public function getName()
{
$name = parent::getName();
if ($this->getInput('feed') != null) {
$name .= ' - ' . $this->getKey('feed');
}
return $name;
}
public function getURI()
{
if ($this->getInput('feed') != 'posts') {
$url = parent::getURI() . 'events/' . $this->getInput('feed');
} else {
$url = parent::getURI() . $this->getInput('feed');
}
return $url;
}
}

View File

@@ -21,8 +21,6 @@ class HackerNewsUserThreadsBridge extends BridgeAbstract
{
$url = 'https://news.ycombinator.com/threads?id=' . $this->getInput('user');
$html = getSimpleHTMLDOM($url);
Debug::log('queried ' . $url);
Debug::log('found ' . $html);
$item = [];
$articles = $html->find('tr[class*="comtr"]');

View File

@@ -1,65 +0,0 @@
<?php
class HardwareInfoBridge extends FeedExpander
{
const NAME = 'Hardware Info Bridge';
const URI = 'https://nl.hardware.info/';
const DESCRIPTION = 'Tech news from hardware.info (Dutch)';
const MAINTAINER = 't0stiman';
public function collectData()
{
$this->collectExpandableDatas('https://nl.hardware.info/updates/all.rss', 10);
}
protected function parseItem(array $item)
{
$itemUrl = $item['uri'];
$articlePage = getSimpleHTMLDOMCached($itemUrl);
$article = $articlePage->find('div.article__content', 0);
//everything under the social bar is not part of the article, remove it
$reachedEndOfArticle = false;
foreach ($article->find('*') as $child) {
if (
!$reachedEndOfArticle && isset($child->attr['class'])
&& $child->attr['class'] == 'article__content__social-bar'
) {
$reachedEndOfArticle = true;
}
if ($reachedEndOfArticle) {
$child->outertext = '';
}
}
//get rid of some more elements we don't need
$to_remove_selectors = [
'script',
'div.incontent',
'div.article__content__social-bar',
'div#revealNewsTip',
'div.article__previous_next'
];
foreach ($to_remove_selectors as $selector) {
foreach ($article->find($selector) as $found) {
$found->outertext = '';
}
}
// convert iframes to links. meant for embedded YouTube videos.
foreach ($article->find('iframe') as $found) {
$iframeUrl = $found->getAttribute('src');
if ($iframeUrl) {
$found->outertext = '<a href="' . $iframeUrl . '">' . $iframeUrl . '</a>';
}
}
$item['content'] = $article;
return $item;
}
}

View File

@@ -0,0 +1,88 @@
<?php
class HarvardBusinessReviewBridge extends BridgeAbstract
{
const NAME = 'Harvard Business Review - The Latest';
const MAINTAINER = 'yourname';
const URI = 'https://hbr.org';
const DESCRIPTION = 'Returns the latest articles from Harvard Business Review';
const CACHE_TIMEOUT = 3600; // 60min
const PARAMETERS = [[
'postcount' => [
'name' => 'Limit',
'type' => 'number',
'required' => true,
'title' => 'Maximum number of items to return',
'defaultValue' => 6, //More requires clicking button "Load more"
],
]];
public function collectData()
{
$url = self::URI . '/the-latest';
$html = getSimpleHTMLDOM($url);
foreach ($html->find('li.stream-entry') as $data) {
// Skip if $data is null
if ($data === null) {
continue;
}
try {
// Skip entries containing the text 'stream-ad-container'
if ($data->innertext !== null && strpos($data->innertext, 'stream-ad-container') !== false) {
continue;
}
// Skip entries with class 'sponsored'
if ($data->hasClass('sponsored')) {
continue;
}
$item = [];
$linkElement = $data->find('a', 0);
$titleElement = $data->find('h3.hed a', 0);
$authorElement = $data->find('ul.byline-list li', 0);
$timestampElement = $data->find('li.pubdate time', 0);
$contentElement = $data->find('div.dek', 0);
if ($linkElement) {
$item['uri'] = self::URI . $linkElement->getAttribute('href');
} else {
continue; // Skip this entry if no link is found
}
if ($titleElement) {
$item['title'] = trim($titleElement->plaintext);
} else {
continue; // Skip this entry if no title is found
}
if ($authorElement) {
$item['author'] = trim($authorElement->plaintext);
} else {
$item['author'] = 'Unknown'; // Default value if author is missing
}
if ($timestampElement) {
$item['timestamp'] = strtotime($timestampElement->plaintext);
} else {
$item['timestamp'] = time(); // Default to current time if timestamp is missing
}
if ($contentElement) {
$item['content'] = trim($contentElement->plaintext);
} else {
$item['content'] = ''; // Default to empty string if content is missing
}
$item['uid'] = hash('sha256', $item['title']);
$this->items[] = $item;
if (count($this->items) >= $this->getInput('postcount')) {
break;
}
} catch (Exception $e) {
// Log the error if necessary
continue; // Skip to the next iteration on error
}
}
}
}

View File

@@ -0,0 +1,71 @@
<?php
class HarvardHealthBlogBridge extends BridgeAbstract
{
const NAME = 'Harvard Health Blog';
const URI = 'https://www.health.harvard.edu/blog';
const DESCRIPTION = 'Retrieve articles from health.harvard.edu';
const MAINTAINER = 'tillcash';
const MAX_ARTICLES = 10;
const PARAMETERS = [
[
'image' => [
'name' => 'Article Image',
'type' => 'checkbox',
'defaultValue' => 'checked',
],
],
];
public function collectData()
{
$dom = getSimpleHTMLDOM(self::URI);
$count = 0;
foreach ($dom->find('div[class="mb-16 md:flex"]') as $element) {
if ($count >= self::MAX_ARTICLES) {
break;
}
$data = $element->find('a[class="hover:text-red transition-colors duration-200"]', 0);
if (!$data) {
continue;
}
$url = $data->href;
$this->items[] = [
'content' => $this->constructContent($url),
'timestamp' => $element->find('time', 0)->datetime,
'title' => $data->plaintext,
'uid' => $url,
'uri' => $url,
];
$count++;
}
}
private function constructContent($url)
{
$dom = getSimpleHTMLDOMCached($url);
$article = $dom->find('div[class*="content-repository-content"]', 0);
if (!$article) {
return 'Content Not Found';
}
// remove article image
if (!$this->getInput('image')) {
$image = $article->find('p', 0);
$image->remove();
}
// remove ads
foreach ($article->find('.inline-ad') as $ad) {
$ad->outertext = '';
}
return $article->innertext;
}
}

View File

@@ -160,9 +160,17 @@ class HeiseBridge extends FeedExpander
$article = defaultLinkTo($article, $item['uri']);
// remove unwanted stuff
foreach ($article->find('figure.branding, a-ad, div.ho-text, a-img, .opt-in__content-container, .a-toc__list, a-collapse') as $element) {
foreach (
$article->find('figure.branding, figure.a-inline-image, a-ad, div.ho-text, a-img,
.a-toc__list, a-collapse, .opt-in__description, .opt-in__footnote') as $element
) {
$element->remove();
}
foreach ($article->find('img') as $element) {
if (str_contains($element->alt, 'l+f')) {
$element->remove();
}
}
// reload html, as remove() is buggy
$article = str_get_html($article->outertext);
@@ -179,7 +187,31 @@ class HeiseBridge extends FeedExpander
}
}
$categories = $article->find('.article-footer__topics ul.topics li.topics__item');
//fix for embbedded youtube-videos
$oldlink = '';
foreach ($article->find('div.video__yt-container') as &$ytvideo) {
if (preg_match('/www.youtube.*?\"/', $ytvideo->innertext, $link) && $link[0] != $oldlink) {
//save link to prevent duplicates
$oldlink = $link[0];
$ytiframe = <<<EOT
<iframe width="560" height="315" src="https://$link[0] title="YouTube video player" frameborder="0"
allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share"
referrerpolicy="strict-origin-when-cross-origin" allowfullscreen></iframe>
EOT;
//check if video is in header or article for correct possitioning
if (strpos($header->innertext, $link[0])) {
$item['content'] .= $ytiframe;
} else {
$ytvideo->innertext .= $ytiframe;
$reloadneeded = 1;
}
}
}
if (isset($reloadneeded)) {
$article = str_get_html($article->outertext);
}
$categories = $article->find('.article-footer__topics ul.topics li.topics__item a-topic a');
foreach ($categories as $category) {
$item['categories'][] = trim($category->plaintext);
}
@@ -187,7 +219,7 @@ class HeiseBridge extends FeedExpander
$content = $article->find('.article-content', 0);
if ($content) {
$contentElements = $content->find(
'p, h3, ul, table, pre, noscript img, a-bilderstrecke h2, a-bilderstrecke figure, a-bilderstrecke figcaption'
'p, h3, ul, ol, table, pre, noscript img, a-bilderstrecke h2, a-bilderstrecke figure, a-bilderstrecke figcaption, noscript iframe'
);
$item['content'] .= implode('', $contentElements);
}

View File

@@ -0,0 +1,97 @@
<?php
class HinduTamilBridge extends FeedExpander
{
const NAME = 'HinduTamil';
const URI = 'https://www.hindutamil.in';
const FEED_BASE_URL = 'https://feeds.feedburner.com/Hindu_Tamil_';
const DESCRIPTION = 'Retrieve full articles from hindutamil.in feeds';
const MAINTAINER = 'tillcash';
const PARAMETERS = [
[
'topic' => [
'name' => 'topic',
'type' => 'list',
'defaultValue' => 'crime',
'values' => [
'Astrology' => 'astrology',
'Blogs' => 'blogs',
'Business' => 'business',
'Cartoon' => 'cartoon',
'Cinema' => 'cinema',
'Crime' => 'crime',
'Discussion' => 'discussion',
'Education' => 'education',
'Environment' => 'environment',
'India' => 'india',
'Lifestyle' => 'life-style',
'Literature' => 'literature',
'Opinion' => 'opinion',
'Reporters' => 'reporters-page',
'Socialmedia' => 'social-media',
'Spirituals' => 'spirituals',
'Sports' => 'sports',
'Supplements' => 'supplements',
'Tamilnadu' => 'tamilnadu',
'Technology' => 'technology',
'Tourism' => 'tourism',
'World' => 'world',
],
],
'limit' => [
'name' => 'limit (max 100)',
'type' => 'number',
'defaultValue' => 10,
],
],
];
public function getName()
{
$topic = $this->getKey('topic');
return self::NAME . ($topic ? ' - ' . $topic : '');
}
public function collectData()
{
$limit = min(100, $this->getInput('limit'));
$url = self::FEED_BASE_URL . $this->getInput('topic');
$this->collectExpandableDatas($url, $limit);
}
protected function parseItem($item)
{
$dom = getSimpleHTMLDOMCached($item['uri']);
$content = $dom->find('#pgContentPrint', 0);
if ($content === null) {
return $item;
}
$item['timestamp'] = $this->getTimestamp($dom) ?? $item['timestamp'];
$item['content'] = $this->getImage($dom) . $this->cleanContent($content);
return $item;
}
private function cleanContent($content): string
{
foreach ($content->find('div[align="center"], script, .adsplacement') as $remove) {
$remove->outertext = '';
}
return $content->innertext;
}
private function getTimestamp($dom): ?string
{
$date = $dom->find('meta[property="article:published_time"]', 0);
return $date ? $date->getAttribute('content') : null;
}
private function getImage($dom): string
{
$image = $dom->find('meta[property="og:image"]', 0);
return $image ? sprintf('<p><img src="%s"></p>', $image->getAttribute('content')) : '';
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,68 @@
<?php
class HumbleBundleBridge extends BridgeAbstract
{
const NAME = 'Humble Bundle';
const MAINTAINER = 'phantop';
const URI = 'https://humblebundle.com/';
const DESCRIPTION = 'Returns bundles from Humble Bundle.';
const PARAMETERS = [[
'type' => [
'name' => 'Bundle type',
'type' => 'list',
'defaultValue' => 'bundles',
'values' => [
'All' => 'bundles',
'Books' => 'books',
'Games' => 'games',
'Software' => 'software',
]
]
]];
public function collectData()
{
$page = getSimpleHTMLDOMCached($this->getURI());
$json_text = $page->find('#landingPage-json-data', 0)->innertext;
$json = json_decode(html_entity_decode($json_text), true)['data'];
$products = [];
$types = ['books', 'games', 'software'];
$types = $this->getInput('type') === 'bundles' ? $types : [$this->getInput('type')];
foreach ($types as $type) {
$products = array_merge($products, $json[$type]['mosaic'][0]['products']);
}
foreach ($products as $element) {
$item = [];
$item['author'] = $element['author'];
$item['timestamp'] = $element['start_date|datetime'];
$item['title'] = $element['tile_short_name'];
$item['uid'] = $element['machine_name'];
$item['uri'] = parent::getURI() . $element['product_url'];
$item['content'] = $element['marketing_blurb'];
$item['content'] .= '<br>' . $element['detailed_marketing_blurb'];
$item['categories'] = $element['hover_highlights'];
array_unshift($item['categories'], explode(':', $element['tile_name'])[0]);
array_unshift($item['categories'], $element['tile_stamp']);
$item['enclosures'] = [$element['tile_logo'], $element['high_res_tile_image']];
$this->items[] = $item;
}
}
public function getName()
{
$name = parent::getName();
$name .= $this->getInput('type') ? ' - ' . $this->getInput('type') : '';
return $name;
}
public function getURI()
{
$uri = parent::getURI() . $this->getInput('type');
return $uri;
}
}

View File

@@ -0,0 +1,40 @@
<?php
class HuntShowdownNewsBridge extends BridgeAbstract
{
const NAME = 'Hunt Showdown News Bridge';
const MAINTAINER = 'deffy92';
const URI = 'https://www.huntshowdown.com';
const DESCRIPTION = 'Returns the latest news from HuntShowdown.com/news';
const BASE_URI = 'https://www.huntshowdown.com/';
public function collectData()
{
$html = getSimpleHTMLDOM('https://www.huntshowdown.com/news/tagged/news');
$articles = defaultLinkTo($html, self::URI)->find('.col');
// Removing first element because it's a "load more" button
array_shift($articles);
foreach ($articles as $article) {
$item = [];
$article_title = $article->find('h3', 0)->plaintext;
$article_content = $article->find('p', 0)->plaintext;
$article_cover = $article->find('img', 0)->src;
// If there is a cover, add it to the content
if (!empty($article_cover)) {
$article_cover = '<img src="' . $article_cover . '" alt="' . $article_title . '"> <br/> <br/>';
$article_content = $article_cover . $article_content;
}
$item['uri'] = $article->find('a', 0)->href;
$item['title'] = $article_title;
$item['content'] = $article_content;
$item['enclosures'] = [$article_cover];
$item['timestamp'] = $article->find('span', 0)->plaintext;
$this->items[] = $item;
}
}
}

Some files were not shown because too many files have changed in this diff Show More