mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2025-08-18 14:22:38 +02:00
Compare commits
165 Commits
2023-09-24
...
2024-02-02
Author | SHA1 | Date | |
---|---|---|---|
|
ca87562cab | ||
|
b964dcd936 | ||
|
81be72ea04 | ||
|
8423c52606 | ||
|
d01c462ad5 | ||
|
b2c8475b2c | ||
|
c4fceab7b3 | ||
|
cfe3dcfe6d | ||
|
a15e578158 | ||
|
c3a968193e | ||
|
6938f06125 | ||
|
0e3a79fd78 | ||
|
e58c867a82 | ||
|
d08d13f2c8 | ||
|
9574c17ddc | ||
|
06b299e627 | ||
|
1262cc982c | ||
|
487c692e68 | ||
|
4986119f1f | ||
|
bd58266b80 | ||
|
4973aaadf6 | ||
|
feb2a686d7 | ||
|
b6909942c8 | ||
|
6ac976b92e | ||
|
72eea1bd3d | ||
|
fffe4663cb | ||
|
a865b1073a | ||
|
d960e0049d | ||
|
bb36eb9eb8 | ||
|
12a90e2074 | ||
|
6408123330 | ||
|
6eaf0eaa56 | ||
|
191e5b0493 | ||
|
d5175aebcc | ||
|
d9ac019550 | ||
|
080e29365a | ||
|
c7e8ddf486 | ||
|
0eb4f6b267 | ||
|
491cb50219 | ||
|
2e5d2a88f3 | ||
|
1fecc4cfc1 | ||
|
0c08f791ef | ||
|
0bf5dbbc0b | ||
|
3ce94409ab | ||
|
ea58c8d2bc | ||
|
55ffac5bae | ||
|
12395fcf2d | ||
|
0f6fa8034b | ||
|
e904de2dc9 | ||
|
ef378663aa | ||
|
fac1f5cd88 | ||
|
7dbe106582 | ||
|
2032ed18c4 | ||
|
f67d2eb88a | ||
|
5ab1924c4f | ||
|
c8178e1fc4 | ||
|
ad2d4c7b1b | ||
|
1938446385 | ||
|
c9074facfe | ||
|
9f163ab7c6 | ||
|
98dafb61ae | ||
|
ea2b4d7506 | ||
|
f40f997405 | ||
|
4c5cf89725 | ||
|
a81acbe464 | ||
|
4e40e032b0 | ||
|
98a94855dc | ||
|
0c6ffbf5a4 | ||
|
3944ae68cb | ||
|
b34fa2d278 | ||
|
c5f586497f | ||
|
c9c2944e7c | ||
|
0116dde275 | ||
|
d4ae55733b | ||
|
4e1fa946b4 | ||
|
d127bf6e00 | ||
|
38e9c396cf | ||
|
0c4b498d4f | ||
|
d157816e07 | ||
|
f01729c86f | ||
|
0b67544f86 | ||
|
a3b064f4ee | ||
|
4a398a5b14 | ||
|
3ef0226a08 | ||
|
c3d9383523 | ||
|
deb9a7269e | ||
|
f3df283c4d | ||
|
206edaedf5 | ||
|
44ff2f2cf8 | ||
|
ccc20849ff | ||
|
609eed1791 | ||
|
b037d1b4d1 | ||
|
2b741b1c1b | ||
|
ef711cb30b | ||
|
4919c53c10 | ||
|
b347a9268a | ||
|
e76b0601b3 | ||
|
57b61c8787 | ||
|
7a7fa876d2 | ||
|
a6310cff1a | ||
|
84b5ffcc7c | ||
|
8d0ddb579f | ||
|
1dabd10e25 | ||
|
cee25d862d | ||
|
d4e4c3e89a | ||
|
f134808a26 | ||
|
a6a4502209 | ||
|
4722201281 | ||
|
4f7451895b | ||
|
8ff39f64f7 | ||
|
658391263e | ||
|
9056106c2d | ||
|
7533ef12e3 | ||
|
a41bb088f8 | ||
|
8203196145 | ||
|
563c2a345b | ||
|
ef5bd83bd0 | ||
|
408c2e5e91 | ||
|
f7f3ca0126 | ||
|
611fabe46c | ||
|
2aa52aa99a | ||
|
cf9558648e | ||
|
daef240cd2 | ||
|
5f37c72be0 | ||
|
fd52b9b9a4 | ||
|
920d00480d | ||
|
49d9dafaec | ||
|
2880524dfc | ||
|
e379019db2 | ||
|
44fb2c98bc | ||
|
382648fc22 | ||
|
9bda9e246a | ||
|
6634291c67 | ||
|
e55a88fb8e | ||
|
6a72c56cdd | ||
|
d21f8cebf6 | ||
|
7e183915a9 | ||
|
145bd10f4c | ||
|
b6a9baff94 | ||
|
143f90da60 | ||
|
47f52b5912 | ||
|
f97a3fa4d9 | ||
|
5f777d4126 | ||
|
e376805249 | ||
|
1cbe1a6f98 | ||
|
59dd49671d | ||
|
64582a64f1 | ||
|
547af0d0d2 | ||
|
69da0dd583 | ||
|
41df17bc46 | ||
|
0c92cf32d4 | ||
|
7273a05f02 | ||
|
d822d666c7 | ||
|
6cf9dfb7c9 | ||
|
3557e5ffd4 | ||
|
2172df9fa2 | ||
|
b9ec6a0eb4 | ||
|
0de5180ded | ||
|
f9ec88fb45 | ||
|
c04c0a5614 | ||
|
ae53adefad | ||
|
f421c45b21 | ||
|
cd30c25b08 | ||
|
e1b911fc1f | ||
|
09f3c1532a |
6
.github/.gitignore
vendored
Normal file
6
.github/.gitignore
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
# Visual Studio Code
|
||||
.vscode/*
|
||||
|
||||
# Generated files
|
||||
comment*.md
|
||||
comment*.txt
|
263
.github/prtester.py
vendored
263
.github/prtester.py
vendored
@@ -1,113 +1,184 @@
|
||||
import argparse
|
||||
import requests
|
||||
import itertools
|
||||
import re
|
||||
from bs4 import BeautifulSoup
|
||||
from datetime import datetime
|
||||
from typing import Iterable
|
||||
import os.path
|
||||
|
||||
# This script is specifically written to be used in automation for https://github.com/RSS-Bridge/rss-bridge
|
||||
#
|
||||
# This will scrape the whitelisted bridges in the current state (port 3000) and the PR state (port 3001) of
|
||||
# RSS-Bridge, generate a feed for each of the bridges and save the output as html files.
|
||||
# It also replaces the default static CSS link with a hardcoded link to @em92's public instance, so viewing
|
||||
# It also add a <base> tag with the url of em's public instance, so viewing
|
||||
# the HTML file locally will actually work as designed.
|
||||
|
||||
def testBridges(bridges,status):
|
||||
for bridge in bridges:
|
||||
if bridge.get('data-ref'): # Some div entries are empty, this ignores those
|
||||
bridgeid = bridge.get('id')
|
||||
bridgeid = bridgeid.split('-')[1] # this extracts a readable bridge name from the bridge metadata
|
||||
print(bridgeid + "\n")
|
||||
bridgestring = '/?action=display&bridge=' + bridgeid + '&format=Html'
|
||||
forms = bridge.find_all("form")
|
||||
formid = 1
|
||||
for form in forms:
|
||||
# a bridge can have multiple contexts, named 'forms' in html
|
||||
# this code will produce a fully working formstring that should create a working feed when called
|
||||
# this will create an example feed for every single context, to test them all
|
||||
formstring = ''
|
||||
errormessages = []
|
||||
parameters = form.find_all("input")
|
||||
lists = form.find_all("select")
|
||||
# this for/if mess cycles through all available input parameters, checks if it required, then pulls
|
||||
# the default or examplevalue and then combines it all together into the formstring
|
||||
# if an example or default value is missing for a required attribute, it will throw an error
|
||||
# any non-required fields are not tested!!!
|
||||
for parameter in parameters:
|
||||
if parameter.get('type') == 'hidden' and parameter.get('name') == 'context':
|
||||
cleanvalue = parameter.get('value').replace(" ","+")
|
||||
formstring = formstring + '&' + parameter.get('name') + '=' + cleanvalue
|
||||
if parameter.get('type') == 'number' or parameter.get('type') == 'text':
|
||||
if parameter.has_attr('required'):
|
||||
if parameter.get('placeholder') == '':
|
||||
if parameter.get('value') == '':
|
||||
errormessages.append(parameter.get('name'))
|
||||
else:
|
||||
formstring = formstring + '&' + parameter.get('name') + '=' + parameter.get('value')
|
||||
class Instance:
|
||||
name = ''
|
||||
url = ''
|
||||
|
||||
def main(instances: Iterable[Instance], with_upload: bool, with_reduced_upload: bool, title: str, output_file: str):
|
||||
start_date = datetime.now()
|
||||
table_rows = []
|
||||
for instance in instances:
|
||||
page = requests.get(instance.url) # Use python requests to grab the rss-bridge main page
|
||||
soup = BeautifulSoup(page.content, "html.parser") # use bs4 to turn the page into soup
|
||||
bridge_cards = soup.select('.bridge-card') # get a soup-formatted list of all bridges on the rss-bridge page
|
||||
table_rows += testBridges(instance, bridge_cards, with_upload, with_reduced_upload) # run the main scraping code with the list of bridges
|
||||
with open(file=output_file, mode='w+', encoding='utf-8') as file:
|
||||
table_rows_value = '\n'.join(sorted(table_rows))
|
||||
file.write(f'''
|
||||
## {title}
|
||||
| Bridge | Context | Status |
|
||||
| - | - | - |
|
||||
{table_rows_value}
|
||||
|
||||
*last change: {start_date.strftime("%A %Y-%m-%d %H:%M:%S")}*
|
||||
'''.strip())
|
||||
|
||||
def testBridges(instance: Instance, bridge_cards: Iterable, with_upload: bool, with_reduced_upload: bool) -> Iterable:
|
||||
instance_suffix = ''
|
||||
if instance.name:
|
||||
instance_suffix = f' ({instance.name})'
|
||||
table_rows = []
|
||||
for bridge_card in bridge_cards:
|
||||
bridgeid = bridge_card.get('id')
|
||||
bridgeid = bridgeid.split('-')[1] # this extracts a readable bridge name from the bridge metadata
|
||||
print(f'{bridgeid}{instance_suffix}')
|
||||
bridgestring = '/?action=display&bridge=' + bridgeid + '&format=Html'
|
||||
bridge_name = bridgeid.replace('Bridge', '')
|
||||
context_forms = bridge_card.find_all("form")
|
||||
form_number = 1
|
||||
for context_form in context_forms:
|
||||
# a bridge can have multiple contexts, named 'forms' in html
|
||||
# this code will produce a fully working formstring that should create a working feed when called
|
||||
# this will create an example feed for every single context, to test them all
|
||||
formstring = ''
|
||||
error_messages = []
|
||||
context_name = '*untitled*'
|
||||
context_name_element = context_form.find_previous_sibling('h5')
|
||||
if context_name_element and context_name_element.text.strip() != '':
|
||||
context_name = context_name_element.text
|
||||
parameters = context_form.find_all("input")
|
||||
lists = context_form.find_all("select")
|
||||
# this for/if mess cycles through all available input parameters, checks if it required, then pulls
|
||||
# the default or examplevalue and then combines it all together into the formstring
|
||||
# if an example or default value is missing for a required attribute, it will throw an error
|
||||
# any non-required fields are not tested!!!
|
||||
for parameter in parameters:
|
||||
if parameter.get('type') == 'hidden' and parameter.get('name') == 'context':
|
||||
cleanvalue = parameter.get('value').replace(" ","+")
|
||||
formstring = formstring + '&' + parameter.get('name') + '=' + cleanvalue
|
||||
if parameter.get('type') == 'number' or parameter.get('type') == 'text':
|
||||
if parameter.has_attr('required'):
|
||||
if parameter.get('placeholder') == '':
|
||||
if parameter.get('value') == '':
|
||||
name_value = parameter.get('name')
|
||||
error_messages.append(f'Missing example or default value for parameter "{name_value}"')
|
||||
else:
|
||||
formstring = formstring + '&' + parameter.get('name') + '=' + parameter.get('placeholder')
|
||||
# same thing, just for checkboxes. If a checkbox is checked per default, it gets added to the formstring
|
||||
if parameter.get('type') == 'checkbox':
|
||||
if parameter.has_attr('checked'):
|
||||
formstring = formstring + '&' + parameter.get('name') + '=on'
|
||||
for listing in lists:
|
||||
selectionvalue = ''
|
||||
listname = listing.get('name')
|
||||
cleanlist = []
|
||||
for option in listing.contents:
|
||||
if 'optgroup' in option.name:
|
||||
cleanlist.extend(option)
|
||||
formstring = formstring + '&' + parameter.get('name') + '=' + parameter.get('value')
|
||||
else:
|
||||
cleanlist.append(option)
|
||||
firstselectionentry = 1
|
||||
for selectionentry in cleanlist:
|
||||
if firstselectionentry:
|
||||
formstring = formstring + '&' + parameter.get('name') + '=' + parameter.get('placeholder')
|
||||
# same thing, just for checkboxes. If a checkbox is checked per default, it gets added to the formstring
|
||||
if parameter.get('type') == 'checkbox':
|
||||
if parameter.has_attr('checked'):
|
||||
formstring = formstring + '&' + parameter.get('name') + '=on'
|
||||
for listing in lists:
|
||||
selectionvalue = ''
|
||||
listname = listing.get('name')
|
||||
cleanlist = []
|
||||
options = listing.find_all('option')
|
||||
for option in options:
|
||||
if 'optgroup' in option.name:
|
||||
cleanlist.extend(option)
|
||||
else:
|
||||
cleanlist.append(option)
|
||||
firstselectionentry = 1
|
||||
for selectionentry in cleanlist:
|
||||
if firstselectionentry:
|
||||
selectionvalue = selectionentry.get('value')
|
||||
firstselectionentry = 0
|
||||
else:
|
||||
if 'selected' in selectionentry.attrs:
|
||||
selectionvalue = selectionentry.get('value')
|
||||
firstselectionentry = 0
|
||||
else:
|
||||
if 'selected' in selectionentry.attrs:
|
||||
selectionvalue = selectionentry.get('value')
|
||||
break
|
||||
formstring = formstring + '&' + listname + '=' + selectionvalue
|
||||
if not errormessages:
|
||||
# if all example/default values are present, form the full request string, run the request, replace the static css
|
||||
# file with the url of em's public instance and then upload it to termpad.com, a pastebin-like-site.
|
||||
r = requests.get(URL + bridgestring + formstring)
|
||||
pagetext = r.text.replace('static/style.css','https://rss-bridge.org/bridge01/static/style.css')
|
||||
pagetext = pagetext.encode("utf_8")
|
||||
termpad = requests.post(url="https://termpad.com/", data=pagetext)
|
||||
termpadurl = termpad.text
|
||||
termpadurl = termpadurl.replace('termpad.com/','termpad.com/raw/')
|
||||
termpadurl = termpadurl.replace('\n','')
|
||||
with open(os.getcwd() + '/comment.txt', 'a+') as file:
|
||||
file.write("\n")
|
||||
file.write("| [`" + bridgeid + '-' + status + '-context' + str(formid) + "`](" + termpadurl + ") | " + date_time + " |")
|
||||
break
|
||||
formstring = formstring + '&' + listname + '=' + selectionvalue
|
||||
termpad_url = 'about:blank'
|
||||
if error_messages:
|
||||
status = '<br>'.join(map(lambda m: f'❌ `{m}`', error_messages))
|
||||
else:
|
||||
# if all example/default values are present, form the full request string, run the request, add a <base> tag with
|
||||
# the url of em's public instance to the response text (so that relative paths work, e.g. to the static css file) and
|
||||
# then upload it to termpad.com, a pastebin-like-site.
|
||||
response = requests.get(instance.url + bridgestring + formstring)
|
||||
page_text = response.text.replace('<head>','<head><base href="https://rss-bridge.org/bridge01/" target="_blank">')
|
||||
page_text = page_text.encode("utf_8")
|
||||
soup = BeautifulSoup(page_text, "html.parser")
|
||||
status_messages = []
|
||||
if response.status_code != 200:
|
||||
status_messages += [f'❌ `HTTP status {response.status_code} {response.reason}`']
|
||||
else:
|
||||
# if there are errors (which means that a required value has no example or default value), log out which error appeared
|
||||
termpad = requests.post(url="https://termpad.com/", data=str(errormessages))
|
||||
termpadurl = termpad.text
|
||||
termpadurl = termpadurl.replace('termpad.com/','termpad.com/raw/')
|
||||
termpadurl = termpadurl.replace('\n','')
|
||||
with open(os.getcwd() + '/comment.txt', 'a+') as file:
|
||||
file.write("\n")
|
||||
file.write("| [`" + bridgeid + '-' + status + '-context' + str(formid) + "`](" + termpadurl + ") | " + date_time + " |")
|
||||
formid += 1
|
||||
feed_items = soup.select('.feeditem')
|
||||
feed_items_length = len(feed_items)
|
||||
if feed_items_length <= 0:
|
||||
status_messages += [f'⚠️ `The feed has no items`']
|
||||
elif feed_items_length == 1 and len(soup.select('.error')) > 0:
|
||||
status_messages += [f'❌ `{getFirstLine(feed_items[0].text)}`']
|
||||
status_messages += map(lambda e: f'❌ `{getFirstLine(e.text)}`', soup.select('.error .error-type') + soup.select('.error .error-message'))
|
||||
for item_element in soup.select('.feeditem'): # remove all feed items to not accidentally selected <pre> tags from item content
|
||||
item_element.decompose()
|
||||
status_messages += map(lambda e: f'⚠️ `{getFirstLine(e.text)}`', soup.find_all('pre'))
|
||||
status_messages = list(dict.fromkeys(status_messages)) # remove duplicates
|
||||
status = '<br>'.join(status_messages)
|
||||
status_is_ok = status == '';
|
||||
if status_is_ok:
|
||||
status = '✔️'
|
||||
if with_upload and (not with_reduced_upload or not status_is_ok):
|
||||
termpad = requests.post(url="https://termpad.com/", data=page_text)
|
||||
termpad_url = termpad.text.strip()
|
||||
termpad_url = termpad_url.replace('termpad.com/','termpad.com/raw/')
|
||||
table_rows.append(f'| {bridge_name} | [{form_number} {context_name}{instance_suffix}]({termpad_url}) | {status} |')
|
||||
form_number += 1
|
||||
return table_rows
|
||||
|
||||
gitstatus = ["current", "pr"]
|
||||
now = datetime.now()
|
||||
date_time = now.strftime("%Y-%m-%d, %H:%M:%S")
|
||||
def getFirstLine(value: str) -> str:
|
||||
# trim whitespace and remove text that can break the table or is simply unnecessary
|
||||
clean_value = re.sub('^\[[^\]]+\]\s*rssbridge\.|[\|`]', '', value.strip())
|
||||
first_line = next(iter(clean_value.splitlines()), '')
|
||||
max_length = 250
|
||||
if (len(first_line) > max_length):
|
||||
first_line = first_line[:max_length] + '...'
|
||||
return first_line
|
||||
|
||||
with open(os.getcwd() + '/comment.txt', 'w+') as file:
|
||||
file.write(''' ## Pull request artifacts
|
||||
| file | last change |
|
||||
| ---- | ------ |''')
|
||||
|
||||
for status in gitstatus: # run this twice, once for the current version, once for the PR version
|
||||
if status == "current":
|
||||
port = "3000" # both ports are defined in the corresponding workflow .yml file
|
||||
elif status == "pr":
|
||||
port = "3001"
|
||||
URL = "http://localhost:" + port
|
||||
page = requests.get(URL) # Use python requests to grab the rss-bridge main page
|
||||
soup = BeautifulSoup(page.content, "html.parser") # use bs4 to turn the page into soup
|
||||
bridges = soup.find_all("section") # get a soup-formatted list of all bridges on the rss-bridge page
|
||||
testBridges(bridges,status) # run the main scraping code with the list of bridges and the info if this is for the current version or the pr version
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--instances', nargs='+')
|
||||
parser.add_argument('--no-upload', action='store_true')
|
||||
parser.add_argument('--reduced-upload', action='store_true')
|
||||
parser.add_argument('--title', default='Pull request artifacts')
|
||||
parser.add_argument('--output-file', default=os.getcwd() + '/comment.txt')
|
||||
args = parser.parse_args()
|
||||
instances = []
|
||||
if args.instances:
|
||||
for instance_arg in args.instances:
|
||||
instance_arg_parts = instance_arg.split('::')
|
||||
instance = Instance()
|
||||
instance.name = instance_arg_parts[1] if len(instance_arg_parts) >= 2 else ''
|
||||
instance.url = instance_arg_parts[0]
|
||||
instances.append(instance)
|
||||
else:
|
||||
instance = Instance()
|
||||
instance.name = 'current'
|
||||
instance.url = 'http://localhost:3000'
|
||||
instances.append(instance)
|
||||
instance = Instance()
|
||||
instance.name = 'pr'
|
||||
instance.url = 'http://localhost:3001'
|
||||
instances.append(instance)
|
||||
main(
|
||||
instances=instances,
|
||||
with_upload=not args.no_upload,
|
||||
with_reduced_upload=args.reduced_upload and not args.no_upload,
|
||||
title=args.title,
|
||||
output_file=args.output_file
|
||||
);
|
4
.github/workflows/prhtmlgenerator.yml
vendored
4
.github/workflows/prhtmlgenerator.yml
vendored
@@ -8,6 +8,8 @@ jobs:
|
||||
test-pr:
|
||||
name: Generate HTML
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
PYTHONUNBUFFERED: 1
|
||||
# Needs additional permissions https://github.com/actions/first-interaction/issues/10#issuecomment-1041402989
|
||||
steps:
|
||||
- name: Check out self
|
||||
@@ -22,7 +24,7 @@ jobs:
|
||||
wget https://raw.githubusercontent.com/$GITHUB_REPOSITORY/${{ github.event.pull_request.base.ref }}/.github/prtester.py;
|
||||
wget https://patch-diff.githubusercontent.com/raw/$GITHUB_REPOSITORY/pull/$PR.patch;
|
||||
touch DEBUG;
|
||||
cat $PR.patch | grep "\bbridges/.*Bridge\.php\b" | sed "s=.*\bbridges/\(.*\)Bridge\.php\b.*=\1=g" | sort | uniq > whitelist.txt
|
||||
cat $PR.patch | grep "\bbridges/[A-Za-z0-9]*Bridge\.php\b" | sed "s=.*\bbridges/\([A-Za-z0-9]*\)Bridge\.php\b.*=\1=g" | sort | uniq > whitelist.txt
|
||||
- name: Start Docker - Current
|
||||
run: |
|
||||
docker run -d -v $GITHUB_WORKSPACE/whitelist.txt:/app/whitelist.txt -v $GITHUB_WORKSPACE/DEBUG:/app/DEBUG -p 3000:80 ghcr.io/rss-bridge/rss-bridge:latest
|
||||
|
1
.gitignore
vendored
1
.gitignore
vendored
@@ -6,7 +6,6 @@ data/
|
||||
*.pydevproject
|
||||
.project
|
||||
.metadata
|
||||
bin/
|
||||
tmp/
|
||||
*.tmp
|
||||
*.bak
|
||||
|
@@ -38,7 +38,7 @@ ENV CURL_IMPERSONATE ff91esr
|
||||
|
||||
COPY ./config/nginx.conf /etc/nginx/sites-available/default
|
||||
COPY ./config/php-fpm.conf /etc/php/8.2/fpm/pool.d/rss-bridge.conf
|
||||
COPY ./config/php.ini /etc/php/8.2/fpm/conf.d/90-rss-bridge.conf
|
||||
COPY ./config/php.ini /etc/php/8.2/fpm/conf.d/90-rss-bridge.ini
|
||||
|
||||
COPY --chown=www-data:www-data ./ /app/
|
||||
|
||||
|
256
README.md
256
README.md
@@ -2,16 +2,25 @@
|
||||
|
||||

|
||||
|
||||
RSS-Bridge is a web application.
|
||||
RSS-Bridge is a PHP web application.
|
||||
|
||||
It generates web feeds for websites that don't have one.
|
||||
|
||||
Officially hosted instance: https://rss-bridge.org/bridge01/
|
||||
|
||||
IRC channel #rssbridge at https://libera.chat/
|
||||
|
||||
[Full documentation](https://rss-bridge.github.io/rss-bridge/index.html)
|
||||
|
||||
Alternatively find another
|
||||
[public instance](https://rss-bridge.github.io/rss-bridge/General/Public_Hosts.html).
|
||||
|
||||
Requires minimum PHP 7.4.
|
||||
|
||||
|
||||
[](UNLICENSE)
|
||||
[](https://github.com/rss-bridge/rss-bridge/releases/latest)
|
||||
[](https://web.libera.chat/#rssbridge)
|
||||
[](https://matrix.to/#/#rssbridge:libera.chat)
|
||||
[](https://github.com/RSS-Bridge/rss-bridge/actions)
|
||||
|
||||
|||
|
||||
@@ -19,9 +28,8 @@ Officially hosted instance: https://rss-bridge.org/bridge01/
|
||||
|||
|
||||
|||
|
||||
|||
|
||||
|||
|
||||
|
||||
## A subset of bridges (17/412)
|
||||
## A subset of bridges (16/447)
|
||||
|
||||
* `CssSelectorBridge`: [Scrape out a feed using CSS selectors](https://rss-bridge.org/bridge01/#bridge-CssSelectorBridge)
|
||||
* `FeedMergeBridge`: [Combine multiple feeds into one](https://rss-bridge.org/bridge01/#bridge-FeedMergeBridge)
|
||||
@@ -36,66 +44,149 @@ Officially hosted instance: https://rss-bridge.org/bridge01/
|
||||
* `ThePirateBayBridge:` [Fetches torrents by search/user/category](https://rss-bridge.org/bridge01/#bridge-ThePirateBayBridge)
|
||||
* `TikTokBridge`: [Fetches posts by username](https://rss-bridge.org/bridge01/#bridge-TikTokBridge)
|
||||
* `TwitchBridge`: [Fetches videos from channel](https://rss-bridge.org/bridge01/#bridge-TwitchBridge)
|
||||
* `TwitterBridge`: [Fetches tweets](https://rss-bridge.org/bridge01/#bridge-TwitterBridge)
|
||||
* `VkBridge`: [Fetches posts from user/group](https://rss-bridge.org/bridge01/#bridge-VkBridge)
|
||||
* `XPathBridge`: [Scrape out a feed using XPath expressions](https://rss-bridge.org/bridge01/#bridge-XPathBridge)
|
||||
* `YoutubeBridge`: [Fetches videos by username/channel/playlist/search](https://rss-bridge.org/bridge01/#bridge-YoutubeBridge)
|
||||
* `YouTubeCommunityTabBridge`: [Fetches posts from a channel's community tab](https://rss-bridge.org/bridge01/#bridge-YouTubeCommunityTabBridge)
|
||||
|
||||
[Full documentation](https://rss-bridge.github.io/rss-bridge/index.html)
|
||||
|
||||
Check out RSS-Bridge right now on https://rss-bridge.org/bridge01/
|
||||
|
||||
Alternatively find another
|
||||
[public instance](https://rss-bridge.github.io/rss-bridge/General/Public_Hosts.html).
|
||||
|
||||
## Tutorial
|
||||
|
||||
### Install with composer or git
|
||||
### How to install on traditional shared web hosting
|
||||
|
||||
Requires minimum PHP 7.4.
|
||||
RSS-Bridge can basically be unzipped in a web folder. Should be working instantly.
|
||||
|
||||
Latest zip as of Sep 2023: https://github.com/RSS-Bridge/rss-bridge/archive/refs/tags/2023-09-24.zip
|
||||
|
||||
### How to install on Debian 12 (nginx + php-fpm)
|
||||
|
||||
These instructions have been tested on a fresh Debian 12 VM from Digital Ocean (1vcpu-512mb-10gb, 5 USD/month).
|
||||
|
||||
```shell
|
||||
apt install nginx php-fpm php-mbstring php-simplexml php-curl
|
||||
timedatectl set-timezone Europe/Oslo
|
||||
|
||||
apt install git nginx php8.2-fpm php-mbstring php-simplexml php-curl
|
||||
|
||||
# Create a new user account
|
||||
useradd --shell /bin/bash --create-home rss-bridge
|
||||
|
||||
cd /var/www
|
||||
|
||||
# Create folder and change ownership
|
||||
mkdir rss-bridge && chown rss-bridge:rss-bridge rss-bridge/
|
||||
|
||||
# Become user
|
||||
su rss-bridge
|
||||
|
||||
# Fetch latest master
|
||||
git clone https://github.com/RSS-Bridge/rss-bridge.git rss-bridge/
|
||||
cd rss-bridge
|
||||
|
||||
# Copy over the default config
|
||||
cp -v config.default.ini.php config.ini.php
|
||||
|
||||
# Give full permissions only to owner (rss-bridge)
|
||||
chmod 700 -R ./
|
||||
|
||||
# Give read and execute to others (nginx and php-fpm)
|
||||
chmod o+rx ./ ./static
|
||||
|
||||
# Give read to others (nginx)
|
||||
chmod o+r -R ./static
|
||||
```
|
||||
|
||||
Nginx config:
|
||||
|
||||
```nginx
|
||||
# /etc/nginx/sites-enabled/rss-bridge.conf
|
||||
|
||||
server {
|
||||
listen 80;
|
||||
server_name example.com;
|
||||
access_log /var/log/nginx/rss-bridge.access.log;
|
||||
error_log /var/log/nginx/rss-bridge.error.log;
|
||||
|
||||
# Intentionally not setting a root folder here
|
||||
|
||||
# autoindex is off by default but feels good to explicitly turn off
|
||||
autoindex off;
|
||||
|
||||
# Static content only served here
|
||||
location /static/ {
|
||||
alias /var/www/rss-bridge/static/;
|
||||
}
|
||||
|
||||
# Pass off to php-fpm only when location is exactly /
|
||||
location = / {
|
||||
root /var/www/rss-bridge/;
|
||||
include snippets/fastcgi-php.conf;
|
||||
fastcgi_pass unix:/run/php/rss-bridge.sock;
|
||||
}
|
||||
|
||||
# Reduce spam
|
||||
location = /favicon.ico {
|
||||
access_log off;
|
||||
log_not_found off;
|
||||
}
|
||||
|
||||
# Reduce spam
|
||||
location = /robots.txt {
|
||||
access_log off;
|
||||
log_not_found off;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
PHP FPM pool config:
|
||||
```ini
|
||||
; /etc/php/8.2/fpm/pool.d/rss-bridge.conf
|
||||
|
||||
[rss-bridge]
|
||||
|
||||
user = rss-bridge
|
||||
group = rss-bridge
|
||||
|
||||
listen = /run/php/rss-bridge.sock
|
||||
|
||||
listen.owner = www-data
|
||||
listen.group = www-data
|
||||
|
||||
pm = static
|
||||
pm.max_children = 10
|
||||
pm.max_requests = 500
|
||||
```
|
||||
|
||||
PHP ini config:
|
||||
```ini
|
||||
; /etc/php/8.2/fpm/conf.d/30-rss-bridge.ini
|
||||
|
||||
max_execution_time = 15
|
||||
memory_limit = 64M
|
||||
```
|
||||
|
||||
Restart fpm and nginx:
|
||||
|
||||
```shell
|
||||
# Lint and restart php-fpm
|
||||
php-fpm8.2 -t
|
||||
systemctl restart php8.2-fpm
|
||||
|
||||
# Lint and restart nginx
|
||||
nginx -t
|
||||
systemctl restart nginx
|
||||
```
|
||||
|
||||
### How to install from Composer
|
||||
|
||||
Install the latest release.
|
||||
|
||||
```shell
|
||||
cd /var/www
|
||||
composer create-project -v --no-dev rss-bridge/rss-bridge
|
||||
```
|
||||
|
||||
```shell
|
||||
cd /var/www
|
||||
git clone https://github.com/RSS-Bridge/rss-bridge.git
|
||||
```
|
||||
### How to install with Caddy
|
||||
|
||||
Config:
|
||||
|
||||
```shell
|
||||
# Give the http user write permission to the cache folder
|
||||
chown www-data:www-data /var/www/rss-bridge/cache
|
||||
|
||||
# Optionally copy over the default config file
|
||||
cp config.default.ini.php config.ini.php
|
||||
```
|
||||
|
||||
Example config for nginx:
|
||||
|
||||
```nginx
|
||||
# /etc/nginx/sites-enabled/rssbridge
|
||||
server {
|
||||
listen 80;
|
||||
server_name example.com;
|
||||
root /var/www/rss-bridge;
|
||||
index index.php;
|
||||
|
||||
location ~ \.php$ {
|
||||
include snippets/fastcgi-php.conf;
|
||||
fastcgi_read_timeout 60s;
|
||||
fastcgi_pass unix:/run/php/php-fpm.sock;
|
||||
}
|
||||
}
|
||||
```
|
||||
TODO. See https://github.com/RSS-Bridge/rss-bridge/issues/3785
|
||||
|
||||
### Install from Docker Hub:
|
||||
|
||||
@@ -154,8 +245,9 @@ Browse http://localhost:3000/
|
||||
[](https://my.scalingo.com/deploy?source=https://github.com/sebsauvage/rss-bridge)
|
||||
[](https://heroku.com/deploy)
|
||||
[](https://www.cloudron.io/store/com.rssbridgeapp.cloudronapp.html)
|
||||
[](https://www.pikapods.com/pods?run=rssbridge)
|
||||
|
||||
The Heroku quick deploy currently does not work. It might possibly work if you fork this repo and
|
||||
The Heroku quick deploy currently does not work. It might work if you fork this repo and
|
||||
modify the `repository` in `scalingo.json`. See https://github.com/RSS-Bridge/rss-bridge/issues/2688
|
||||
|
||||
Learn more in
|
||||
@@ -163,6 +255,64 @@ Learn more in
|
||||
|
||||
## How-to
|
||||
|
||||
### How to fix "Access denied."
|
||||
|
||||
Output is from php-fpm. It is unable to read index.php.
|
||||
|
||||
chown rss-bridge:rss-bridge /var/www/rss-bridge/index.php
|
||||
|
||||
### How to password-protect the instance (token)
|
||||
|
||||
Modify `config.ini.php`:
|
||||
|
||||
[authentication]
|
||||
|
||||
token = "hunter2"
|
||||
|
||||
### How to remove all cache items
|
||||
|
||||
As current user:
|
||||
|
||||
bin/cache-clear
|
||||
|
||||
As user rss-bridge:
|
||||
|
||||
sudo -u rss-bridge bin/cache-clear
|
||||
|
||||
As root:
|
||||
|
||||
sudo bin/cache-clear
|
||||
|
||||
### How to remove all expired cache items
|
||||
|
||||
bin/cache-prune
|
||||
|
||||
### How to fix "PHP Fatal error: Uncaught Exception: The FileCache path is not writable"
|
||||
|
||||
```shell
|
||||
# Give rss-bridge ownership
|
||||
chown rss-bridge:rss-bridge -R /var/www/rss-bridge/cache
|
||||
|
||||
# Or, give www-data ownership
|
||||
chown www-data:www-data -R /var/www/rss-bridge/cache
|
||||
|
||||
# Or, give everyone write permission
|
||||
chmod 777 -R /var/www/rss-bridge/cache
|
||||
|
||||
# Or last ditch effort (CAREFUL)
|
||||
rm -rf /var/www/rss-bridge/cache/ && mkdir /var/www/rss-bridge/cache/
|
||||
```
|
||||
|
||||
### How to fix "attempt to write a readonly database"
|
||||
|
||||
The sqlite files (db, wal and shm) are not writeable.
|
||||
|
||||
chown -v rss-bridge:rss-bridge cache/*
|
||||
|
||||
### How to fix "Unable to prepare statement: 1, no such table: storage"
|
||||
|
||||
rm cache/*
|
||||
|
||||
### How to create a new bridge from scratch
|
||||
|
||||
Create the new bridge in e.g. `bridges/BearBlogBridge.php`:
|
||||
@@ -192,8 +342,6 @@ Learn more in [bridge api](https://rss-bridge.github.io/rss-bridge/Bridge_API/in
|
||||
|
||||
### How to enable all bridges
|
||||
|
||||
Modify `config.ini.php`:
|
||||
|
||||
enabled_bridges[] = *
|
||||
|
||||
### How to enable some bridges
|
||||
@@ -248,9 +396,9 @@ Modify `report_limit` so that an error must occur 3 times before it is reported.
|
||||
; Defines how often an error must occur before it is reported to the user
|
||||
report_limit = 3
|
||||
|
||||
### How to password-protect the instance
|
||||
The report count is reset to 0 each day.
|
||||
|
||||
HTTP basic access authentication:
|
||||
### How to password-protect the instance (HTTP Basic Auth)
|
||||
|
||||
[authentication]
|
||||
|
||||
@@ -266,7 +414,7 @@ https://alice:cat@rss-bridge.org/bridge01/?action=display&bridge=FabriceBellardB
|
||||
|
||||
### How to create a new output format
|
||||
|
||||
[Create a new format](https://rss-bridge.github.io/rss-bridge/Format_API/index.html).
|
||||
See `formats/PlaintextFormat.php` for an example.
|
||||
|
||||
### How to run unit tests and linter
|
||||
|
||||
@@ -275,6 +423,8 @@ These commands require that you have installed the dev dependencies in `composer
|
||||
./vendor/bin/phpunit
|
||||
./vendor/bin/phpcs --standard=phpcs.xml --warning-severity=0 --extensions=php -p ./
|
||||
|
||||
https://github.com/squizlabs/PHP_CodeSniffer/wiki
|
||||
|
||||
### How to spawn a minimal development environment
|
||||
|
||||
php -S 127.0.0.1:9001
|
||||
@@ -298,7 +448,7 @@ Cached files are deleted automatically after 24 hours.
|
||||
RSS-Bridge allows you to take full control over which bridges are displayed to the user.
|
||||
That way you can host your own RSS-Bridge service with your favorite collection of bridges!
|
||||
|
||||
Current maintainers (as of 2023): @dvikan and @Mynacol #2519
|
||||
Current maintainers (as of 2024): @dvikan and @Mynacol #2519
|
||||
|
||||
## Reference
|
||||
|
||||
|
@@ -1,17 +1,5 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* This file is part of RSS-Bridge, a PHP project capable of generating RSS and
|
||||
* Atom feeds for websites that don't have one.
|
||||
*
|
||||
* For the full license information, please view the UNLICENSE file distributed
|
||||
* with this source code.
|
||||
*
|
||||
* @package Core
|
||||
* @license http://unlicense.org/ UNLICENSE
|
||||
* @link https://github.com/rss-bridge/rss-bridge
|
||||
*/
|
||||
|
||||
/**
|
||||
* Checks if the website for a given bridge is reachable.
|
||||
*
|
||||
@@ -31,13 +19,13 @@ class ConnectivityAction implements ActionInterface
|
||||
$this->bridgeFactory = new BridgeFactory();
|
||||
}
|
||||
|
||||
public function execute(array $request)
|
||||
public function execute(Request $request)
|
||||
{
|
||||
if (!Debug::isEnabled()) {
|
||||
return new Response('This action is only available in debug mode!', 403);
|
||||
}
|
||||
|
||||
$bridgeName = $request['bridge'] ?? null;
|
||||
$bridgeName = $request->get('bridge');
|
||||
if (!$bridgeName) {
|
||||
return render_template('connectivity.html.php');
|
||||
}
|
||||
|
@@ -1,29 +1,17 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* This file is part of RSS-Bridge, a PHP project capable of generating RSS and
|
||||
* Atom feeds for websites that don't have one.
|
||||
*
|
||||
* For the full license information, please view the UNLICENSE file distributed
|
||||
* with this source code.
|
||||
*
|
||||
* @package Core
|
||||
* @license http://unlicense.org/ UNLICENSE
|
||||
* @link https://github.com/rss-bridge/rss-bridge
|
||||
*/
|
||||
|
||||
class DetectAction implements ActionInterface
|
||||
{
|
||||
public function execute(array $request)
|
||||
public function execute(Request $request)
|
||||
{
|
||||
$targetURL = $request['url'] ?? null;
|
||||
$format = $request['format'] ?? null;
|
||||
$url = $request->get('url');
|
||||
$format = $request->get('format');
|
||||
|
||||
if (!$targetURL) {
|
||||
throw new \Exception('You must specify a url!');
|
||||
if (!$url) {
|
||||
return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => 'You must specify a url']));
|
||||
}
|
||||
if (!$format) {
|
||||
throw new \Exception('You must specify a format!');
|
||||
return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => 'You must specify a format']));
|
||||
}
|
||||
|
||||
$bridgeFactory = new BridgeFactory();
|
||||
@@ -35,19 +23,23 @@ class DetectAction implements ActionInterface
|
||||
|
||||
$bridge = $bridgeFactory->create($bridgeClassName);
|
||||
|
||||
$bridgeParams = $bridge->detectParameters($targetURL);
|
||||
$bridgeParams = $bridge->detectParameters($url);
|
||||
|
||||
if (is_null($bridgeParams)) {
|
||||
if (!$bridgeParams) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$bridgeParams['bridge'] = $bridgeClassName;
|
||||
$bridgeParams['format'] = $format;
|
||||
|
||||
$url = '?action=display&' . http_build_query($bridgeParams);
|
||||
return new Response('', 301, ['location' => $url]);
|
||||
$query = [
|
||||
'action' => 'display',
|
||||
'bridge' => $bridgeClassName,
|
||||
'format' => $format,
|
||||
];
|
||||
$query = array_merge($query, $bridgeParams);
|
||||
return new Response('', 301, ['location' => '?' . http_build_query($query)]);
|
||||
}
|
||||
|
||||
throw new \Exception('No bridge found for given URL: ' . $targetURL);
|
||||
return new Response(render(__DIR__ . '/../templates/error.html.php', [
|
||||
'message' => 'No bridge found for given URL: ' . $url,
|
||||
]));
|
||||
}
|
||||
}
|
||||
|
@@ -11,15 +11,13 @@ class DisplayAction implements ActionInterface
|
||||
$this->logger = RssBridge::getLogger();
|
||||
}
|
||||
|
||||
public function execute(array $request)
|
||||
public function execute(Request $request)
|
||||
{
|
||||
if (Configuration::getConfig('system', 'enable_maintenance_mode')) {
|
||||
return new Response(render(__DIR__ . '/../templates/error.html.php', [
|
||||
'title' => '503 Service Unavailable',
|
||||
'message' => 'RSS-Bridge is down for maintenance.',
|
||||
]), 503);
|
||||
}
|
||||
$cacheKey = 'http_' . json_encode($request);
|
||||
$bridgeName = $request->get('bridge');
|
||||
$format = $request->get('format');
|
||||
$noproxy = $request->get('_noproxy');
|
||||
|
||||
$cacheKey = 'http_' . json_encode($request->toArray());
|
||||
/** @var Response $cachedResponse */
|
||||
$cachedResponse = $this->cache->get($cacheKey);
|
||||
if ($cachedResponse) {
|
||||
@@ -37,7 +35,6 @@ class DisplayAction implements ActionInterface
|
||||
return $cachedResponse;
|
||||
}
|
||||
|
||||
$bridgeName = $request['bridge'] ?? null;
|
||||
if (!$bridgeName) {
|
||||
return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => 'Missing bridge parameter']), 400);
|
||||
}
|
||||
@@ -46,7 +43,7 @@ class DisplayAction implements ActionInterface
|
||||
if (!$bridgeClassName) {
|
||||
return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => 'Bridge not found']), 404);
|
||||
}
|
||||
$format = $request['format'] ?? null;
|
||||
|
||||
if (!$format) {
|
||||
return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => 'You must specify a format']), 400);
|
||||
}
|
||||
@@ -54,7 +51,7 @@ class DisplayAction implements ActionInterface
|
||||
return new Response(render(__DIR__ . '/../templates/error.html.php', ['message' => 'This bridge is not whitelisted']), 400);
|
||||
}
|
||||
|
||||
$noproxy = $request['_noproxy'] ?? null;
|
||||
|
||||
if (
|
||||
Configuration::getConfig('proxy', 'url')
|
||||
&& Configuration::getConfig('proxy', 'by_bridge')
|
||||
@@ -71,7 +68,7 @@ class DisplayAction implements ActionInterface
|
||||
$response = $this->createResponse($request, $bridge, $format);
|
||||
|
||||
if ($response->getCode() === 200) {
|
||||
$ttl = $request['_cache_timeout'] ?? null;
|
||||
$ttl = $request->get('_cache_timeout');
|
||||
if (Configuration::getConfig('cache', 'custom_timeout') && $ttl) {
|
||||
$ttl = (int) $ttl;
|
||||
} else {
|
||||
@@ -80,45 +77,55 @@ class DisplayAction implements ActionInterface
|
||||
$this->cache->set($cacheKey, $response, $ttl);
|
||||
}
|
||||
|
||||
if (in_array($response->getCode(), [429, 503])) {
|
||||
$this->cache->set($cacheKey, $response, 60 * 15 + rand(1, 60 * 10)); // average 20m
|
||||
if (in_array($response->getCode(), [403, 429, 503])) {
|
||||
// Cache these responses for about ~20 mins on average
|
||||
$this->cache->set($cacheKey, $response, 60 * 15 + rand(1, 60 * 10));
|
||||
}
|
||||
|
||||
if ($response->getCode() === 500) {
|
||||
$this->cache->set($cacheKey, $response, 60 * 15);
|
||||
}
|
||||
|
||||
if (rand(1, 100) === 2) {
|
||||
$this->cache->prune();
|
||||
}
|
||||
|
||||
return $response;
|
||||
}
|
||||
|
||||
private function createResponse(array $request, BridgeAbstract $bridge, FormatInterface $format)
|
||||
private function createResponse(Request $request, BridgeAbstract $bridge, FormatAbstract $format)
|
||||
{
|
||||
$items = [];
|
||||
$infos = [];
|
||||
$feed = [];
|
||||
|
||||
try {
|
||||
$bridge->loadConfiguration();
|
||||
// Remove parameters that don't concern bridges
|
||||
$bridgeData = array_diff_key($request, array_fill_keys(['action', 'bridge', 'format', '_noproxy', '_cache_timeout', '_error_time'], ''));
|
||||
$bridge->setDatas($bridgeData);
|
||||
$remove = [
|
||||
'token',
|
||||
'action',
|
||||
'bridge',
|
||||
'format',
|
||||
'_noproxy',
|
||||
'_cache_timeout',
|
||||
'_error_time',
|
||||
'_', // Some RSS readers add a cache-busting parameter (_=<timestamp>) to feed URLs, detect and ignore them.
|
||||
];
|
||||
$requestArray = $request->toArray();
|
||||
$input = array_diff_key($requestArray, array_fill_keys($remove, ''));
|
||||
$bridge->setInput($input);
|
||||
$bridge->collectData();
|
||||
$items = $bridge->getItems();
|
||||
if (isset($items[0]) && is_array($items[0])) {
|
||||
$feedItems = [];
|
||||
foreach ($items as $item) {
|
||||
$feedItems[] = new FeedItem($item);
|
||||
$feedItems[] = FeedItem::fromArray($item);
|
||||
}
|
||||
$items = $feedItems;
|
||||
}
|
||||
$infos = [
|
||||
'name' => $bridge->getName(),
|
||||
'uri' => $bridge->getURI(),
|
||||
'donationUri' => $bridge->getDonationURI(),
|
||||
'icon' => $bridge->getIcon()
|
||||
];
|
||||
$feed = $bridge->getFeed();
|
||||
} catch (\Exception $e) {
|
||||
// Probably an exception inside a bridge
|
||||
if ($e instanceof HttpException) {
|
||||
// Reproduce (and log) these responses regardless of error output and report limit
|
||||
if ($e->getCode() === 429) {
|
||||
@@ -151,7 +158,7 @@ class DisplayAction implements ActionInterface
|
||||
}
|
||||
|
||||
$format->setItems($items);
|
||||
$format->setExtraInfos($infos);
|
||||
$format->setFeed($feed);
|
||||
$now = time();
|
||||
$format->setLastModified($now);
|
||||
$headers = [
|
||||
@@ -167,8 +174,8 @@ class DisplayAction implements ActionInterface
|
||||
|
||||
// Create a unique identifier every 24 hours
|
||||
$uniqueIdentifier = urlencode((int)(time() / 86400));
|
||||
$itemTitle = sprintf('Bridge returned error %s! (%s)', $e->getCode(), $uniqueIdentifier);
|
||||
$item->setTitle($itemTitle);
|
||||
$title = sprintf('Bridge returned error %s! (%s)', $e->getCode(), $uniqueIdentifier);
|
||||
$item->setTitle($title);
|
||||
$item->setURI(get_current_url());
|
||||
$item->setTimestamp(time());
|
||||
|
||||
@@ -187,6 +194,7 @@ class DisplayAction implements ActionInterface
|
||||
|
||||
private function logBridgeError($bridgeName, $code)
|
||||
{
|
||||
// todo: it's not really necessary to json encode $report
|
||||
$cacheKey = 'error_reporting_' . $bridgeName . '_' . $code;
|
||||
$report = $this->cache->get($cacheKey);
|
||||
if ($report) {
|
||||
|
@@ -7,12 +7,12 @@
|
||||
*/
|
||||
class FindfeedAction implements ActionInterface
|
||||
{
|
||||
public function execute(array $request)
|
||||
public function execute(Request $request)
|
||||
{
|
||||
$targetURL = $request['url'] ?? null;
|
||||
$format = $request['format'] ?? null;
|
||||
$url = $request->get('url');
|
||||
$format = $request->get('format');
|
||||
|
||||
if (!$targetURL) {
|
||||
if (!$url) {
|
||||
return new Response('You must specify a url', 400);
|
||||
}
|
||||
if (!$format) {
|
||||
@@ -29,7 +29,7 @@ class FindfeedAction implements ActionInterface
|
||||
|
||||
$bridge = $bridgeFactory->create($bridgeClassName);
|
||||
|
||||
$bridgeParams = $bridge->detectParameters($targetURL);
|
||||
$bridgeParams = $bridge->detectParameters($url);
|
||||
|
||||
if ($bridgeParams === null) {
|
||||
continue;
|
||||
@@ -56,7 +56,7 @@ class FindfeedAction implements ActionInterface
|
||||
$bridgeParams['bridge'] = $bridgeClassName;
|
||||
$bridgeParams['format'] = $format;
|
||||
$content = [
|
||||
'url' => get_home_page_url() . '?action=display&' . http_build_query($bridgeParams),
|
||||
'url' => './?action=display&' . http_build_query($bridgeParams),
|
||||
'bridgeParams' => $bridgeParams,
|
||||
'bridgeData' => $bridgeData,
|
||||
'bridgeMeta' => [
|
||||
|
@@ -2,10 +2,9 @@
|
||||
|
||||
final class FrontpageAction implements ActionInterface
|
||||
{
|
||||
public function execute(array $request)
|
||||
public function execute(Request $request)
|
||||
{
|
||||
$messages = [];
|
||||
$showInactive = (bool) ($request['show_inactive'] ?? null);
|
||||
$activeBridges = 0;
|
||||
|
||||
$bridgeFactory = new BridgeFactory();
|
||||
@@ -18,19 +17,15 @@ final class FrontpageAction implements ActionInterface
|
||||
];
|
||||
}
|
||||
|
||||
$formatFactory = new FormatFactory();
|
||||
$formats = $formatFactory->getFormatNames();
|
||||
|
||||
$body = '';
|
||||
foreach ($bridgeClassNames as $bridgeClassName) {
|
||||
if ($bridgeFactory->isEnabled($bridgeClassName)) {
|
||||
$body .= BridgeCard::displayBridgeCard($bridgeClassName, $formats);
|
||||
$body .= BridgeCard::render($bridgeClassName, $request);
|
||||
$activeBridges++;
|
||||
} elseif ($showInactive) {
|
||||
$body .= BridgeCard::displayBridgeCard($bridgeClassName, $formats, false) . PHP_EOL;
|
||||
}
|
||||
}
|
||||
|
||||
// todo: cache this renderered template?
|
||||
return render(__DIR__ . '/../templates/frontpage.html.php', [
|
||||
'messages' => $messages,
|
||||
'admin_email' => Configuration::getConfig('admin', 'email'),
|
||||
@@ -38,7 +33,6 @@ final class FrontpageAction implements ActionInterface
|
||||
'bridges' => $body,
|
||||
'active_bridges' => $activeBridges,
|
||||
'total_bridges' => count($bridgeClassNames),
|
||||
'show_inactive' => $showInactive,
|
||||
]);
|
||||
}
|
||||
}
|
||||
|
@@ -4,7 +4,7 @@ declare(strict_types=1);
|
||||
|
||||
class HealthAction implements ActionInterface
|
||||
{
|
||||
public function execute(array $request)
|
||||
public function execute(Request $request)
|
||||
{
|
||||
$response = [
|
||||
'code' => 200,
|
||||
|
@@ -1,20 +1,8 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* This file is part of RSS-Bridge, a PHP project capable of generating RSS and
|
||||
* Atom feeds for websites that don't have one.
|
||||
*
|
||||
* For the full license information, please view the UNLICENSE file distributed
|
||||
* with this source code.
|
||||
*
|
||||
* @package Core
|
||||
* @license http://unlicense.org/ UNLICENSE
|
||||
* @link https://github.com/rss-bridge/rss-bridge
|
||||
*/
|
||||
|
||||
class ListAction implements ActionInterface
|
||||
{
|
||||
public function execute(array $request)
|
||||
public function execute(Request $request)
|
||||
{
|
||||
$list = new \stdClass();
|
||||
$list->bridges = [];
|
||||
@@ -26,14 +14,14 @@ class ListAction implements ActionInterface
|
||||
$bridge = $bridgeFactory->create($bridgeClassName);
|
||||
|
||||
$list->bridges[$bridgeClassName] = [
|
||||
'status' => $bridgeFactory->isEnabled($bridgeClassName) ? 'active' : 'inactive',
|
||||
'uri' => $bridge->getURI(),
|
||||
'donationUri' => $bridge->getDonationURI(),
|
||||
'name' => $bridge->getName(),
|
||||
'icon' => $bridge->getIcon(),
|
||||
'parameters' => $bridge->getParameters(),
|
||||
'maintainer' => $bridge->getMaintainer(),
|
||||
'description' => $bridge->getDescription()
|
||||
'status' => $bridgeFactory->isEnabled($bridgeClassName) ? 'active' : 'inactive',
|
||||
'uri' => $bridge->getURI(),
|
||||
'donationUri' => $bridge->getDonationURI(),
|
||||
'name' => $bridge->getName(),
|
||||
'icon' => $bridge->getIcon(),
|
||||
'parameters' => $bridge->getParameters(),
|
||||
'maintainer' => $bridge->getMaintainer(),
|
||||
'description' => $bridge->getDescription()
|
||||
];
|
||||
}
|
||||
$list->total = count($list->bridges);
|
||||
|
@@ -1,58 +0,0 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* This file is part of RSS-Bridge, a PHP project capable of generating RSS and
|
||||
* Atom feeds for websites that don't have one.
|
||||
*
|
||||
* For the full license information, please view the UNLICENSE file distributed
|
||||
* with this source code.
|
||||
*
|
||||
* @package Core
|
||||
* @license http://unlicense.org/ UNLICENSE
|
||||
* @link https://github.com/rss-bridge/rss-bridge
|
||||
*/
|
||||
|
||||
class SetBridgeCacheAction implements ActionInterface
|
||||
{
|
||||
private CacheInterface $cache;
|
||||
|
||||
public function __construct()
|
||||
{
|
||||
$this->cache = RssBridge::getCache();
|
||||
}
|
||||
|
||||
public function execute(array $request)
|
||||
{
|
||||
$authenticationMiddleware = new ApiAuthenticationMiddleware();
|
||||
$authenticationMiddleware($request);
|
||||
|
||||
$key = $request['key'] ?? null;
|
||||
if (!$key) {
|
||||
returnClientError('You must specify key!');
|
||||
}
|
||||
|
||||
$bridgeFactory = new BridgeFactory();
|
||||
|
||||
$bridgeName = $request['bridge'] ?? null;
|
||||
$bridgeClassName = $bridgeFactory->createBridgeClassName($bridgeName);
|
||||
if (!$bridgeClassName) {
|
||||
throw new \Exception(sprintf('Bridge not found: %s', $bridgeName));
|
||||
}
|
||||
|
||||
// whitelist control
|
||||
if (!$bridgeFactory->isEnabled($bridgeClassName)) {
|
||||
throw new \Exception('This bridge is not whitelisted', 401);
|
||||
}
|
||||
|
||||
$bridge = $bridgeFactory->create($bridgeClassName);
|
||||
$bridge->loadConfiguration();
|
||||
$value = $request['value'];
|
||||
|
||||
$cacheKey = get_class($bridge) . '_' . $key;
|
||||
$ttl = 86400 * 3;
|
||||
$this->cache->set($cacheKey, $value, $ttl);
|
||||
|
||||
header('Content-Type: text/plain');
|
||||
echo 'done';
|
||||
}
|
||||
}
|
14
bin/cache-clear
Executable file
14
bin/cache-clear
Executable file
@@ -0,0 +1,14 @@
|
||||
#!/usr/bin/env php
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Remove all items from the cache
|
||||
*/
|
||||
|
||||
require __DIR__ . '/../lib/bootstrap.php';
|
||||
|
||||
$rssBridge = new RssBridge();
|
||||
|
||||
$cache = RssBridge::getCache();
|
||||
|
||||
$cache->clear();
|
14
bin/cache-prune
Executable file
14
bin/cache-prune
Executable file
@@ -0,0 +1,14 @@
|
||||
#!/usr/bin/env php
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Remove all expired items from the cache
|
||||
*/
|
||||
|
||||
require __DIR__ . '/../lib/bootstrap.php';
|
||||
|
||||
$rssBridge = new RssBridge();
|
||||
|
||||
$cache = RssBridge::getCache();
|
||||
|
||||
$cache->prune();
|
@@ -63,11 +63,13 @@ class ARDAudiothekBridge extends BridgeAbstract
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$oldTz = date_default_timezone_get();
|
||||
$path = $this->getInput('path');
|
||||
$limit = $this->getInput('limit');
|
||||
|
||||
$oldTz = date_default_timezone_get();
|
||||
date_default_timezone_set('Europe/Berlin');
|
||||
|
||||
$pathComponents = explode('/', $this->getInput('path'));
|
||||
$pathComponents = explode('/', $path);
|
||||
if (empty($pathComponents)) {
|
||||
returnClientError('Path may not be empty');
|
||||
}
|
||||
@@ -82,17 +84,21 @@ class ARDAudiothekBridge extends BridgeAbstract
|
||||
}
|
||||
|
||||
$url = self::APIENDPOINT . 'programsets/' . $showID . '/';
|
||||
$rawJSON = getContents($url);
|
||||
$processedJSON = json_decode($rawJSON)->data->programSet;
|
||||
$json1 = getContents($url);
|
||||
$data1 = Json::decode($json1, false);
|
||||
$processedJSON = $data1->data->programSet;
|
||||
if (!$processedJSON) {
|
||||
throw new \Exception('Unable to find show id: ' . $showID);
|
||||
}
|
||||
|
||||
$limit = $this->getInput('limit');
|
||||
$answerLength = 1;
|
||||
$offset = 0;
|
||||
$numberOfElements = 1;
|
||||
|
||||
while ($answerLength != 0 && $offset < $numberOfElements && (is_null($limit) || $offset < $limit)) {
|
||||
$rawJSON = getContents($url . '?offset=' . $offset);
|
||||
$processedJSON = json_decode($rawJSON)->data->programSet;
|
||||
$json2 = getContents($url . '?offset=' . $offset);
|
||||
$data2 = Json::decode($json2, false);
|
||||
$processedJSON = $data2->data->programSet;
|
||||
|
||||
$answerLength = count($processedJSON->items->nodes);
|
||||
$offset = $offset + $answerLength;
|
||||
@@ -113,7 +119,16 @@ class ARDAudiothekBridge extends BridgeAbstract
|
||||
$item['timestamp'] = $audio->publicationStartDateAndTime;
|
||||
$item['uid'] = $audio->id;
|
||||
$item['author'] = $audio->programSet->publicationService->title;
|
||||
$item['categories'] = [ $audio->programSet->editorialCategories->title ];
|
||||
|
||||
$category = $audio->programSet->editorialCategories->title ?? null;
|
||||
if ($category) {
|
||||
$item['categories'] = [$category];
|
||||
}
|
||||
|
||||
$item['itunes'] = [
|
||||
'duration' => $audio->duration,
|
||||
];
|
||||
|
||||
$this->items[] = $item;
|
||||
}
|
||||
}
|
||||
|
@@ -20,17 +20,14 @@ class AcrimedBridge extends FeedExpander
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$this->collectExpandableDatas(
|
||||
static::URI . 'spip.php?page=backend',
|
||||
$this->getInput('limit')
|
||||
);
|
||||
$url = 'https://www.acrimed.org/spip.php?page=backend';
|
||||
$limit = $this->getInput('limit');
|
||||
$this->collectExpandableDatas($url, $limit);
|
||||
}
|
||||
|
||||
protected function parseItem($newsItem)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($newsItem);
|
||||
|
||||
$articlePage = getSimpleHTMLDOM($newsItem->link);
|
||||
$articlePage = getSimpleHTMLDOM($item['uri']);
|
||||
$article = sanitize($articlePage->find('article.article1', 0)->innertext);
|
||||
$article = defaultLinkTo($article, static::URI);
|
||||
$item['content'] = $article;
|
||||
|
@@ -35,10 +35,23 @@ class AllegroBridge extends BridgeAbstract
|
||||
|
||||
public function getName()
|
||||
{
|
||||
parse_str(parse_url($this->getInput('url'), PHP_URL_QUERY), $fields);
|
||||
$url = $this->getInput('url');
|
||||
if (!$url) {
|
||||
return parent::getName();
|
||||
}
|
||||
$parsedUrl = parse_url($url, PHP_URL_QUERY);
|
||||
if (!$parsedUrl) {
|
||||
return parent::getName();
|
||||
}
|
||||
parse_str($parsedUrl, $fields);
|
||||
|
||||
if ($query = array_key_exists('string', $fields) ? urldecode($fields['string']) : false) {
|
||||
return $query;
|
||||
if (array_key_exists('string', $fields)) {
|
||||
$f = urldecode($fields['string']);
|
||||
} else {
|
||||
$f = false;
|
||||
}
|
||||
if ($f) {
|
||||
return $f;
|
||||
}
|
||||
|
||||
return parent::getName();
|
||||
|
175
bridges/AnnasArchiveBridge.php
Normal file
175
bridges/AnnasArchiveBridge.php
Normal file
@@ -0,0 +1,175 @@
|
||||
<?php
|
||||
|
||||
class AnnasArchiveBridge extends BridgeAbstract
|
||||
{
|
||||
const NAME = 'Anna\'s Archive';
|
||||
const MAINTAINER = 'phantop';
|
||||
const URI = 'https://annas-archive.org/';
|
||||
const DESCRIPTION = 'Returns books from Anna\'s Archive';
|
||||
const PARAMETERS = [
|
||||
[
|
||||
'q' => [
|
||||
'name' => 'Query',
|
||||
'exampleValue' => 'apothecary diaries',
|
||||
'required' => true,
|
||||
],
|
||||
'ext' => [
|
||||
'name' => 'Extension',
|
||||
'type' => 'list',
|
||||
'values' => [
|
||||
'Any' => null,
|
||||
'azw3' => 'azw3',
|
||||
'cbr' => 'cbr',
|
||||
'cbz' => 'cbz',
|
||||
'djvu' => 'djvu',
|
||||
'epub' => 'epub',
|
||||
'fb2' => 'fb2',
|
||||
'fb2.zip' => 'fb2.zip',
|
||||
'mobi' => 'mobi',
|
||||
'pdf' => 'pdf',
|
||||
]
|
||||
],
|
||||
'lang' => [
|
||||
'name' => 'Language',
|
||||
'type' => 'list',
|
||||
'values' => [
|
||||
'Any' => null,
|
||||
'Afrikaans [af]' => 'af',
|
||||
'Arabic [ar]' => 'ar',
|
||||
'Bangla [bn]' => 'bn',
|
||||
'Belarusian [be]' => 'be',
|
||||
'Bulgarian [bg]' => 'bg',
|
||||
'Catalan [ca]' => 'ca',
|
||||
'Chinese [zh]' => 'zh',
|
||||
'Church Slavic [cu]' => 'cu',
|
||||
'Croatian [hr]' => 'hr',
|
||||
'Czech [cs]' => 'cs',
|
||||
'Danish [da]' => 'da',
|
||||
'Dongxiang [sce]' => 'sce',
|
||||
'Dutch [nl]' => 'nl',
|
||||
'English [en]' => 'en',
|
||||
'French [fr]' => 'fr',
|
||||
'German [de]' => 'de',
|
||||
'Greek [el]' => 'el',
|
||||
'Hebrew [he]' => 'he',
|
||||
'Hindi [hi]' => 'hi',
|
||||
'Hungarian [hu]' => 'hu',
|
||||
'Indonesian [id]' => 'id',
|
||||
'Irish [ga]' => 'ga',
|
||||
'Italian [it]' => 'it',
|
||||
'Japanese [ja]' => 'ja',
|
||||
'Kazakh [kk]' => 'kk',
|
||||
'Korean [ko]' => 'ko',
|
||||
'Latin [la]' => 'la',
|
||||
'Latvian [lv]' => 'lv',
|
||||
'Lithuanian [lt]' => 'lt',
|
||||
'Luxembourgish [lb]' => 'lb',
|
||||
'Ndolo [ndl]' => 'ndl',
|
||||
'Norwegian [no]' => 'no',
|
||||
'Persian [fa]' => 'fa',
|
||||
'Polish [pl]' => 'pl',
|
||||
'Portuguese [pt]' => 'pt',
|
||||
'Romanian [ro]' => 'ro',
|
||||
'Russian [ru]' => 'ru',
|
||||
'Serbian [sr]' => 'sr',
|
||||
'Spanish [es]' => 'es',
|
||||
'Swedish [sv]' => 'sv',
|
||||
'Tamil [ta]' => 'ta',
|
||||
'Traditional Chinese [zh‑Hant]' => 'zh‑Hant',
|
||||
'Turkish [tr]' => 'tr',
|
||||
'Ukrainian [uk]' => 'uk',
|
||||
'Unknown language' => '_empty',
|
||||
'Unknown language [und]' => 'und',
|
||||
'Unknown language [urdu]' => 'urdu',
|
||||
'Urdu [ur]' => 'ur',
|
||||
'Vietnamese [vi]' => 'vi',
|
||||
'Welsh [cy]' => 'cy',
|
||||
]
|
||||
],
|
||||
'content' => [
|
||||
'name' => 'Type',
|
||||
'type' => 'list',
|
||||
'values' => [
|
||||
'Any' => null,
|
||||
'Book (fiction)' => 'book_fiction',
|
||||
'Book (non‑fiction)' => 'book_nonfiction',
|
||||
'Book (unknown)' => 'book_unknown',
|
||||
'Comic book' => 'book_comic',
|
||||
'Journal article' => 'journal_article',
|
||||
'Magazine' => 'magazine',
|
||||
'Standards document' => 'standards_document',
|
||||
]
|
||||
],
|
||||
'src' => [
|
||||
'name' => 'Source',
|
||||
'type' => 'list',
|
||||
'values' => [
|
||||
'Any' => null,
|
||||
'Internet Archive' => 'ia',
|
||||
'Libgen.li' => 'lgli',
|
||||
'Libgen.rs' => 'lgrs',
|
||||
'Sci‑Hub' => 'scihub',
|
||||
'Z‑Library' => 'zlib',
|
||||
]
|
||||
],
|
||||
]
|
||||
];
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$url = $this->getURI();
|
||||
$list = getSimpleHTMLDOMCached($url);
|
||||
$list = defaultLinkTo($list, self::URI);
|
||||
|
||||
// Don't attempt to do anything if not found message is given
|
||||
if ($list->find('.js-not-found-additional')) {
|
||||
return;
|
||||
}
|
||||
|
||||
foreach ($list->find('.w-full > .mb-4 > div > a') as $element) {
|
||||
$item = [];
|
||||
$item['title'] = $element->find('h3', 0)->plaintext;
|
||||
$item['author'] = $element->find('div.italic', 0)->plaintext;
|
||||
$item['uri'] = $element->href;
|
||||
$item['content'] = $element->plaintext;
|
||||
$item['uid'] = $item['uri'];
|
||||
|
||||
if ($item_html = getSimpleHTMLDOMCached($item['uri'])) {
|
||||
$item_html = defaultLinkTo($item_html, self::URI);
|
||||
$item['content'] .= $item_html->find('main img', 0);
|
||||
$item['content'] .= $item_html->find('main .mt-4', 0); // Summary
|
||||
if ($links = $item_html->find('main ul.mb-4', -1)) {
|
||||
foreach ($links->find('li > a.js-download-link') as $file) {
|
||||
$item['enclosures'][] = $file->href;
|
||||
}
|
||||
// Remove bulk torrents from enclosures list
|
||||
$item['enclosures'] = array_diff($item['enclosures'], [self::URI . 'datasets']);
|
||||
}
|
||||
}
|
||||
|
||||
$this->items[] = $item;
|
||||
}
|
||||
}
|
||||
|
||||
public function getName()
|
||||
{
|
||||
$name = parent::getName();
|
||||
if ($this->getInput('q') != null) {
|
||||
$name .= ' - ' . $this->getInput('q');
|
||||
}
|
||||
return $name;
|
||||
}
|
||||
|
||||
public function getURI()
|
||||
{
|
||||
$params = array_filter([ // Filter to remove non-provided parameters
|
||||
'q' => $this->getInput('q'),
|
||||
'ext' => $this->getInput('ext'),
|
||||
'lang' => $this->getInput('lang'),
|
||||
'src' => $this->getInput('src'),
|
||||
'content' => $this->getInput('content'),
|
||||
]);
|
||||
$url = parent::getURI() . 'search?sort=newest&' . http_build_query($params);
|
||||
return $url;
|
||||
}
|
||||
}
|
@@ -30,39 +30,39 @@ class ArsTechnicaBridge extends FeedExpander
|
||||
public function collectData()
|
||||
{
|
||||
$url = 'https://feeds.arstechnica.com/arstechnica/' . $this->getInput('section');
|
||||
$this->collectExpandableDatas($url);
|
||||
$this->collectExpandableDatas($url, 10);
|
||||
}
|
||||
|
||||
protected function parseItem($newItem)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($newItem);
|
||||
|
||||
$item_html = getSimpleHTMLDOMCached($item['uri'] . '&');
|
||||
$item_html = getSimpleHTMLDOMCached($item['uri']);
|
||||
$item_html = defaultLinkTo($item_html, self::URI);
|
||||
$item['content'] = $item_html->find('.amp-wp-article-content', 0);
|
||||
$item['content'] = $item_html->find('.article-content', 0);
|
||||
|
||||
$pages = $item_html->find('nav.page-numbers > .numbers > a', -2);
|
||||
if (null !== $pages) {
|
||||
for ($i = 2; $i <= $pages->innertext; $i++) {
|
||||
$page_url = $item['uri'] . '&page=' . $i;
|
||||
$page_html = getSimpleHTMLDOMCached($page_url);
|
||||
$page_html = defaultLinkTo($page_html, self::URI);
|
||||
$item['content'] .= $page_html->find('.article-content', 0);
|
||||
}
|
||||
$item['content'] = str_get_html($item['content']);
|
||||
}
|
||||
|
||||
// remove various ars advertising
|
||||
$item['content']->find('#social-left', 0)->remove();
|
||||
foreach ($item['content']->find('.ars-component-buy-box') as $ad) {
|
||||
$ad->remove();
|
||||
}
|
||||
foreach ($item['content']->find('i-amphtml-sizer') as $ad) {
|
||||
foreach ($item['content']->find('.ad_wrapper') as $ad) {
|
||||
$ad->remove();
|
||||
}
|
||||
foreach ($item['content']->find('.sidebar') as $ad) {
|
||||
$ad->remove();
|
||||
}
|
||||
|
||||
foreach ($item['content']->find('a') as $link) { //remove amp redirect links
|
||||
$url = $link->getAttribute('href');
|
||||
if (str_contains($url, 'go.redirectingat.com')) {
|
||||
$url = extractFromDelimiters($url, 'url=', '&');
|
||||
$url = urldecode($url);
|
||||
$link->setAttribute('href', $url);
|
||||
}
|
||||
}
|
||||
|
||||
$item['content'] = backgroundToImg(str_replace('data-amp-original-style="background-image', 'style="background-image', $item['content']));
|
||||
$item['content'] = backgroundToImg($item['content']);
|
||||
|
||||
$item['uid'] = explode('=', $item['uri'])[1];
|
||||
|
||||
|
@@ -156,6 +156,10 @@ class Arte7Bridge extends BridgeAbstract
|
||||
. $element['mainImage']['url']
|
||||
. '" /></a>';
|
||||
|
||||
$item['itunes'] = [
|
||||
'duration' => $durationSeconds,
|
||||
];
|
||||
|
||||
$this->items[] = $item;
|
||||
}
|
||||
}
|
||||
|
212
bridges/BMDSystemhausBlogBridge.php
Normal file
212
bridges/BMDSystemhausBlogBridge.php
Normal file
@@ -0,0 +1,212 @@
|
||||
<?php
|
||||
|
||||
class BMDSystemhausBlogBridge extends BridgeAbstract
|
||||
{
|
||||
const MAINTAINER = 'cn-tools';
|
||||
const NAME = 'BMD SYSTEMHAUS GesmbH';
|
||||
const CACHE_TIMEOUT = 21600; //6h
|
||||
const URI = 'https://www.bmd.com';
|
||||
const DONATION_URI = 'https://paypal.me/cntools';
|
||||
const DESCRIPTION = 'BMD Systemhaus - We make business easy';
|
||||
|
||||
const ITEMSTYLE = [
|
||||
'ilcr' => '<table width="100%"><tr><td style="vertical-align: top;">{data_img}</td><td style="vertical-align: top;">{data_content}</td></tr></table>',
|
||||
'clir' => '<table width="100%"><tr><td style="vertical-align: top;">{data_content}</td><td style="vertical-align: top;">{data_img}</td></tr></table>',
|
||||
'itcb' => '<div>{data_img}<br />{data_content}</div>',
|
||||
'ctib' => '<div>{data_content}<br />{data_img}</div>',
|
||||
'co' => '{data_content}',
|
||||
'io' => '{data_img}'
|
||||
];
|
||||
|
||||
const PARAMETERS = [
|
||||
'Blog' => [
|
||||
'country' => [
|
||||
'name' => 'Country',
|
||||
'type' => 'list',
|
||||
'values' => [
|
||||
'Österreich' => 'at',
|
||||
'Deutschland' => 'de',
|
||||
'Schweiz' => 'ch',
|
||||
'Slovensko' => 'sk',
|
||||
'Cesko' => 'cz',
|
||||
'Hungary' => 'hu',
|
||||
],
|
||||
'defaultValue' => 'at',
|
||||
],
|
||||
'style' => [
|
||||
'name' => 'Style',
|
||||
'type' => 'list',
|
||||
'values' => [
|
||||
'Image left, content right' => 'ilcr',
|
||||
'Content left, image right' => 'clir',
|
||||
'Image top, content bottom' => 'itcb',
|
||||
'Content top, image bottom' => 'ctib',
|
||||
'Content only' => 'co',
|
||||
'Image only' => 'io',
|
||||
],
|
||||
'defaultValue' => 'ilcr',
|
||||
]
|
||||
]
|
||||
];
|
||||
|
||||
//-----------------------------------------------------
|
||||
public function collectData()
|
||||
{
|
||||
// get website content
|
||||
$html = getSimpleHTMLDOM($this->getURI()) or returnServerError('No contents received!');
|
||||
|
||||
// Convert relative links in HTML into absolute links
|
||||
$html = defaultLinkTo($html, self::URI);
|
||||
|
||||
// Convert lazy-loading images and frames (video embeds) into static elements
|
||||
$html = convertLazyLoading($html);
|
||||
|
||||
foreach ($html->find('div#bmdNewsList div#bmdNewsList-Item') as $element) {
|
||||
$itemScope = $element->find('div[itemscope=itemscope]', 0);
|
||||
|
||||
$item = [];
|
||||
|
||||
// set base article data
|
||||
$item['title'] = $this->getMetaItemPropContent($itemScope, 'headline');
|
||||
$item['timestamp'] = strtotime($this->getMetaItemPropContent($itemScope, 'datePublished'));
|
||||
$item['author'] = $this->getMetaItemPropContent($itemScope->find('div[itemprop=author]', 0), 'name');
|
||||
|
||||
// find article image
|
||||
$imageTag = '';
|
||||
$image = $element->find('div.mediaelement.mediaelement-image img', 0);
|
||||
if ((!is_null($image)) and ($image->src != '')) {
|
||||
$item['enclosures'] = [$image->src];
|
||||
$imageTag = '<img src="' . $image->src . '"/>';
|
||||
}
|
||||
|
||||
// begin with right style
|
||||
$content = self::ITEMSTYLE[$this->getInput('style')];
|
||||
|
||||
// render placeholder
|
||||
$content = str_replace('{data_content}', $this->getMetaItemPropContent($itemScope, 'description'), $content);
|
||||
$content = str_replace('{data_img}', $imageTag, $content);
|
||||
|
||||
// set finished content
|
||||
$item['content'] = $content;
|
||||
|
||||
// get link to article
|
||||
$link = $element->find('div#bmdNewsList-Text div#bmdNewsList-Title a', 0);
|
||||
if (!is_null($link)) {
|
||||
$item['uri'] = $link->href;
|
||||
}
|
||||
|
||||
// init categories
|
||||
$categories = [];
|
||||
$tmpOne = [];
|
||||
$tmpTwo = [];
|
||||
|
||||
// search first categorie span
|
||||
$catElem = $element->find('div#bmdNewsList-Text div#bmdNewsList-Category span.news-list-category', 0);
|
||||
$txt = trim($catElem->innertext);
|
||||
$tmpOne = explode('/', $txt);
|
||||
|
||||
// split by 2 spaces
|
||||
foreach ($tmpOne as $tmpElem) {
|
||||
$tmpElem = trim($tmpElem);
|
||||
$tmpData = preg_split('/ /', $tmpElem);
|
||||
$tmpTwo = array_merge($tmpTwo, $tmpData);
|
||||
}
|
||||
|
||||
// split by tabulator
|
||||
foreach ($tmpTwo as $tmpElem) {
|
||||
$tmpElem = trim($tmpElem);
|
||||
$tmpData = preg_split('/\t+/', $tmpElem);
|
||||
$categories = array_merge($categories, $tmpData);
|
||||
}
|
||||
|
||||
// trim each categorie entries
|
||||
$categories = array_map('trim', $categories);
|
||||
|
||||
// remove empty entries
|
||||
$categories = array_filter($categories, function ($value) {
|
||||
return !is_null($value) && $value !== '';
|
||||
});
|
||||
|
||||
// set categories
|
||||
if (count($categories) > 0) {
|
||||
$item['categories'] = $categories;
|
||||
}
|
||||
|
||||
// add item
|
||||
if (($item['title'] != '') and ($item['content'] != '') and ($item['uri'] != '')) {
|
||||
$this->items[] = $item;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//-----------------------------------------------------
|
||||
public function detectParameters($url)
|
||||
{
|
||||
try {
|
||||
$parsedUrl = Url::fromString($url);
|
||||
} catch (UrlException $e) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if ($parsedUrl->getHost() != 'www.bmd.com') {
|
||||
return null;
|
||||
}
|
||||
|
||||
$path = explode('/', $parsedUrl->getPath());
|
||||
|
||||
if ($this->getURIbyCountry($path[1]) == '') {
|
||||
return null;
|
||||
}
|
||||
|
||||
$params = [];
|
||||
$params['country'] = $path[1];
|
||||
return $params;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------
|
||||
public function getURI()
|
||||
{
|
||||
$lURI = $this->getURIbyCountry($this->getInput('country'));
|
||||
return $lURI != '' ? $lURI : parent::getURI();
|
||||
}
|
||||
|
||||
//-----------------------------------------------------
|
||||
public function getIcon()
|
||||
{
|
||||
return 'https://www.bmd.com/favicon.ico';
|
||||
}
|
||||
|
||||
//-----------------------------------------------------
|
||||
private function getMetaItemPropContent($elem, $key)
|
||||
{
|
||||
if (($key != '') and (!is_null($elem))) {
|
||||
$metaElem = $elem->find('meta[itemprop=' . $key . ']', 0);
|
||||
if (!is_null($metaElem)) {
|
||||
return $metaElem->getAttribute('content');
|
||||
}
|
||||
}
|
||||
|
||||
return '';
|
||||
}
|
||||
|
||||
//-----------------------------------------------------
|
||||
private function getURIbyCountry($country)
|
||||
{
|
||||
switch ($country) {
|
||||
case 'at':
|
||||
return 'https://www.bmd.com/at/ueber-bmd/blog-ohne-filter.html';
|
||||
case 'de':
|
||||
return 'https://www.bmd.com/de/das-ist-bmd/blog.html';
|
||||
case 'ch':
|
||||
return 'https://www.bmd.com/ch/das-ist-bmd/blog.html';
|
||||
case 'sk':
|
||||
return 'https://www.bmd.com/sk/firma/blog.html';
|
||||
case 'cz':
|
||||
return 'https://www.bmd.com/cz/firma/news-blog.html';
|
||||
case 'hu':
|
||||
return 'https://www.bmd.com/hu/rolunk/hirek.html';
|
||||
default:
|
||||
return '';
|
||||
}
|
||||
}
|
||||
}
|
@@ -8,48 +8,27 @@ class BinanceBridge extends BridgeAbstract
|
||||
const MAINTAINER = 'thefranke';
|
||||
const CACHE_TIMEOUT = 3600; // 1h
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$url = 'https://www.binance.com/bapi/composite/v1/public/content/blog/list?category=&tag=&page=1&size=12';
|
||||
$json = getContents($url);
|
||||
$data = Json::decode($json, false);
|
||||
foreach ($data->data->blogList as $post) {
|
||||
$item = [];
|
||||
$item['title'] = $post->title;
|
||||
// Url slug not in json
|
||||
//$item['uri'] = $uri;
|
||||
$item['timestamp'] = $post->postTimeUTC / 1000;
|
||||
$item['author'] = 'Binance';
|
||||
$item['content'] = $post->brief;
|
||||
//$item['categories'] = $category;
|
||||
$item['uid'] = $post->idStr;
|
||||
$this->items[] = $item;
|
||||
}
|
||||
}
|
||||
|
||||
public function getIcon()
|
||||
{
|
||||
return 'https://bin.bnbstatic.com/static/images/common/favicon.ico';
|
||||
}
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$html = getSimpleHTMLDOM(self::URI)
|
||||
or returnServerError('Could not fetch Binance blog data.');
|
||||
|
||||
$appData = $html->find('script[id="__APP_DATA"]');
|
||||
$appDataJson = json_decode($appData[0]->innertext);
|
||||
$allposts = $appDataJson->routeProps->f3ac->blogListRes->list;
|
||||
|
||||
foreach ($allposts as $element) {
|
||||
$date = $element->releasedTime;
|
||||
$title = $element->title;
|
||||
$category = $element->category->name;
|
||||
|
||||
$suburl = strtolower($category);
|
||||
$suburl = str_replace(' ', '_', $suburl);
|
||||
|
||||
$uri = self::URI . '/' . $suburl . '/' . $element->idStr;
|
||||
|
||||
$contentHTML = getSimpleHTMLDOMCached($uri);
|
||||
$contentAppData = $contentHTML->find('script[id="__APP_DATA"]');
|
||||
$contentAppDataJson = json_decode($contentAppData[0]->innertext);
|
||||
$content = $contentAppDataJson->routeProps->a106->blogDetail->content;
|
||||
|
||||
$item = [];
|
||||
$item['title'] = $title;
|
||||
$item['uri'] = $uri;
|
||||
$item['timestamp'] = substr($date, 0, -3);
|
||||
$item['author'] = 'Binance';
|
||||
$item['content'] = $content;
|
||||
$item['categories'] = $category;
|
||||
|
||||
$this->items[] = $item;
|
||||
|
||||
if (count($this->items) >= 10) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -7,10 +7,14 @@ class BleepingComputerBridge extends FeedExpander
|
||||
const URI = 'https://www.bleepingcomputer.com/';
|
||||
const DESCRIPTION = 'Returns the newest articles.';
|
||||
|
||||
protected function parseItem($item)
|
||||
public function collectData()
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
$feed = static::URI . 'feed/';
|
||||
$this->collectExpandableDatas($feed);
|
||||
}
|
||||
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$article_html = getSimpleHTMLDOMCached($item['uri']);
|
||||
if (!$article_html) {
|
||||
$item['content'] .= '<p><em>Could not request ' . $this->getName() . ': ' . $item['uri'] . '</em></p>';
|
||||
@@ -23,10 +27,4 @@ class BleepingComputerBridge extends FeedExpander
|
||||
|
||||
return $item;
|
||||
}
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$feed = static::URI . 'feed/';
|
||||
$this->collectExpandableDatas($feed);
|
||||
}
|
||||
}
|
||||
|
28
bridges/BundesverbandFuerFreieKammernBridge.php
Normal file
28
bridges/BundesverbandFuerFreieKammernBridge.php
Normal file
@@ -0,0 +1,28 @@
|
||||
<?php
|
||||
|
||||
class BundesverbandFuerFreieKammernBridge extends XPathAbstract
|
||||
{
|
||||
const NAME = 'Bundesverband für freie Kammern e.V.';
|
||||
const URI = 'https://www.bffk.de/aktuelles/aktuelle-nachrichten.html';
|
||||
const DESCRIPTION = 'Aktuelle Nachrichten';
|
||||
const MAINTAINER = 'hleskien';
|
||||
|
||||
const FEED_SOURCE_URL = 'https://www.bffk.de/aktuelles/aktuelle-nachrichten.html';
|
||||
//const XPATH_EXPRESSION_FEED_ICON = './/link[@rel="icon"]/@href';
|
||||
const XPATH_EXPRESSION_ITEM = '//ul[@class="article-list"]/li';
|
||||
const XPATH_EXPRESSION_ITEM_TITLE = './/a/text()';
|
||||
const XPATH_EXPRESSION_ITEM_CONTENT = './/a/text()';
|
||||
const XPATH_EXPRESSION_ITEM_URI = './/a/@href';
|
||||
//const XPATH_EXPRESSION_ITEM_AUTHOR = './/';
|
||||
const XPATH_EXPRESSION_ITEM_TIMESTAMP = './/span/i';
|
||||
//const XPATH_EXPRESSION_ITEM_ENCLOSURES = './';
|
||||
//const XPATH_EXPRESSION_ITEM_CATEGORIES = './/';
|
||||
|
||||
protected function formatItemTimestamp($value)
|
||||
{
|
||||
$value = trim($value, '()');
|
||||
$dti = DateTimeImmutable::createFromFormat('d.m.Y', $value);
|
||||
$dti = $dti->setTime(0, 0, 0);
|
||||
return $dti->getTimestamp();
|
||||
}
|
||||
}
|
@@ -1,6 +1,6 @@
|
||||
<?php
|
||||
|
||||
class CNETBridge extends BridgeAbstract
|
||||
class CNETBridge extends SitemapBridge
|
||||
{
|
||||
const MAINTAINER = 'ORelio';
|
||||
const NAME = 'CNET News';
|
||||
@@ -14,101 +14,101 @@ class CNETBridge extends BridgeAbstract
|
||||
'type' => 'list',
|
||||
'values' => [
|
||||
'All articles' => '',
|
||||
'Apple' => 'apple',
|
||||
'Google' => 'google',
|
||||
'Microsoft' => 'tags-microsoft',
|
||||
'Computers' => 'topics-computers',
|
||||
'Mobile' => 'topics-mobile',
|
||||
'Sci-Tech' => 'topics-sci-tech',
|
||||
'Security' => 'topics-security',
|
||||
'Internet' => 'topics-internet',
|
||||
'Tech Industry' => 'topics-tech-industry'
|
||||
'Tech' => 'tech',
|
||||
'Money' => 'personal-finance',
|
||||
'Home' => 'home',
|
||||
'Wellness' => 'health',
|
||||
'Energy' => 'home/energy-and-utilities',
|
||||
'Deals' => 'deals',
|
||||
'Computing' => 'tech/computing',
|
||||
'Mobile' => 'tech/mobile',
|
||||
'Science' => 'science',
|
||||
'Services' => 'tech/services-and-software'
|
||||
]
|
||||
]
|
||||
],
|
||||
'limit' => self::LIMIT
|
||||
]
|
||||
];
|
||||
|
||||
private function cleanArticle($article_html)
|
||||
{
|
||||
$offset_p = strpos($article_html, '<p>');
|
||||
$offset_figure = strpos($article_html, '<figure');
|
||||
$offset = ($offset_figure < $offset_p ? $offset_figure : $offset_p);
|
||||
$article_html = substr($article_html, $offset);
|
||||
$article_html = str_replace('href="/', 'href="' . self::URI, $article_html);
|
||||
$article_html = str_replace(' height="0"', '', $article_html);
|
||||
$article_html = str_replace('<noscript>', '', $article_html);
|
||||
$article_html = str_replace('</noscript>', '', $article_html);
|
||||
$article_html = StripWithDelimiters($article_html, '<a class="clickToEnlarge', '</a>');
|
||||
$article_html = stripWithDelimiters($article_html, '<span class="nowPlaying', '</span>');
|
||||
$article_html = stripWithDelimiters($article_html, '<span class="duration', '</span>');
|
||||
$article_html = stripWithDelimiters($article_html, '<script', '</script>');
|
||||
$article_html = stripWithDelimiters($article_html, '<svg', '</svg>');
|
||||
return $article_html;
|
||||
}
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
// Retrieve and check user input
|
||||
$topic = str_replace('-', '/', $this->getInput('topic'));
|
||||
if (!empty($topic) && (substr_count($topic, '/') > 1 || !ctype_alpha(str_replace('/', '', $topic)))) {
|
||||
returnClientError('Invalid topic: ' . $topic);
|
||||
$topic = $this->getInput('topic');
|
||||
$limit = $this->getInput('limit');
|
||||
$limit = empty($limit) ? 10 : $limit;
|
||||
|
||||
$url_pattern = empty($topic) ? '' : self::URI . $topic;
|
||||
$sitemap_latest = self::URI . 'sitemaps/article/' . date('Y/m') . '.xml';
|
||||
$sitemap_previous = self::URI . 'sitemaps/article/' . date('Y/m', strtotime('last day of previous month')) . '.xml';
|
||||
|
||||
$links = array_merge(
|
||||
$this->sitemapXmlToList($this->getSitemapXml($sitemap_latest, true), $url_pattern, $limit),
|
||||
$this->sitemapXmlToList($this->getSitemapXml($sitemap_previous, true), $url_pattern, $limit)
|
||||
);
|
||||
|
||||
if ($limit > 0 && count($links) > $limit) {
|
||||
$links = array_slice($links, 0, $limit);
|
||||
}
|
||||
|
||||
// Retrieve webpage
|
||||
$pageUrl = self::URI . (empty($topic) ? 'news/' : $topic . '/');
|
||||
$html = getSimpleHTMLDOM($pageUrl);
|
||||
if (empty($links)) {
|
||||
returnClientError('Failed to retrieve article list');
|
||||
}
|
||||
|
||||
// Process articles
|
||||
foreach ($html->find('div.assetBody, div.riverPost') as $element) {
|
||||
if (count($this->items) >= 10) {
|
||||
break;
|
||||
}
|
||||
foreach ($links as $article_uri) {
|
||||
$article_dom = convertLazyLoading(getSimpleHTMLDOMCached($article_uri));
|
||||
$title = trim($article_dom->find('h1', 0)->plaintext);
|
||||
$author = $article_dom->find('span.c-assetAuthor_name', 0)->plaintext;
|
||||
$headline = $article_dom->find('p.c-contentHeader_description', 0);
|
||||
$content = $article_dom->find('div.c-pageArticle_content, div.single-article__content, div.article-main-body', 0);
|
||||
$date = null;
|
||||
$enclosure = null;
|
||||
|
||||
$article_title = trim($element->find('h2, h3', 0)->plaintext);
|
||||
$article_uri = self::URI . substr($element->find('a', 0)->href, 1);
|
||||
$article_thumbnail = $element->parent()->find('img[src]', 0)->src;
|
||||
$article_timestamp = strtotime($element->find('time.assetTime, div.timeAgo', 0)->plaintext);
|
||||
$article_author = trim($element->find('a[rel=author], a.name', 0)->plaintext);
|
||||
$article_content = '<p><b>' . trim($element->find('p.dek', 0)->plaintext) . '</b></p>';
|
||||
|
||||
if (is_null($article_thumbnail)) {
|
||||
$article_thumbnail = extractFromDelimiters($element->innertext, '<img src="', '"');
|
||||
}
|
||||
|
||||
if (!empty($article_title) && !empty($article_uri) && strpos($article_uri, self::URI . 'news/') !== false) {
|
||||
$article_html = getSimpleHTMLDOMCached($article_uri) or $article_html = null;
|
||||
|
||||
if (!is_null($article_html)) {
|
||||
if (empty($article_thumbnail)) {
|
||||
$article_thumbnail = $article_html->find('div.originalImage', 0);
|
||||
}
|
||||
if (empty($article_thumbnail)) {
|
||||
$article_thumbnail = $article_html->find('span.imageContainer', 0);
|
||||
}
|
||||
if (is_object($article_thumbnail)) {
|
||||
$article_thumbnail = $article_thumbnail->find('img', 0)->src;
|
||||
}
|
||||
|
||||
$article_content .= trim(
|
||||
$this->cleanArticle(
|
||||
extractFromDelimiters(
|
||||
$article_html,
|
||||
'<article',
|
||||
'<footer'
|
||||
)
|
||||
)
|
||||
);
|
||||
foreach ($article_dom->find('script[type=application/ld+json]') as $ldjson) {
|
||||
$datePublished = extractFromDelimiters($ldjson->innertext, '"datePublished":"', '"');
|
||||
if ($datePublished !== false) {
|
||||
$date = strtotime($datePublished);
|
||||
}
|
||||
$imageObject = extractFromDelimiters($ldjson->innertext, 'ImageObject","url":"', '"');
|
||||
if ($imageObject !== false) {
|
||||
$enclosure = $imageObject;
|
||||
}
|
||||
|
||||
$item = [];
|
||||
$item['uri'] = $article_uri;
|
||||
$item['title'] = $article_title;
|
||||
$item['author'] = $article_author;
|
||||
$item['timestamp'] = $article_timestamp;
|
||||
$item['enclosures'] = [$article_thumbnail];
|
||||
$item['content'] = $article_content;
|
||||
$this->items[] = $item;
|
||||
}
|
||||
|
||||
foreach ($content->find('div.c-shortcodeGallery') as $cleanup) {
|
||||
$cleanup->outertext = '';
|
||||
}
|
||||
|
||||
foreach ($content->find('figure') as $figure) {
|
||||
$img = $figure->find('img', 0);
|
||||
if ($img) {
|
||||
$figure->outertext = $img->outertext;
|
||||
}
|
||||
}
|
||||
|
||||
$content = $content->innertext;
|
||||
|
||||
if ($enclosure) {
|
||||
$content = "<div><img src=\"$enclosure\" /></div>" . $content;
|
||||
}
|
||||
|
||||
if ($headline) {
|
||||
$content = '<p><b>' . $headline->plaintext . '</b></p><br />' . $content;
|
||||
}
|
||||
|
||||
$item = [];
|
||||
$item['uri'] = $article_uri;
|
||||
$item['title'] = $title;
|
||||
$item['author'] = $author;
|
||||
$item['content'] = $content;
|
||||
|
||||
if (!is_null($date)) {
|
||||
$item['timestamp'] = $date;
|
||||
}
|
||||
|
||||
if (!is_null($enclosure)) {
|
||||
$item['enclosures'] = [$enclosure];
|
||||
}
|
||||
|
||||
$this->items[] = $item;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -43,10 +43,8 @@ class CNETFranceBridge extends FeedExpander
|
||||
$this->collectExpandableDatas('https://www.cnetfrance.fr/feeds/rss/news/');
|
||||
}
|
||||
|
||||
protected function parseItem($feedItem)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($feedItem);
|
||||
|
||||
foreach ($this->bannedTitle as $term) {
|
||||
if (preg_match('/' . $term . '/mi', $item['title']) === 1) {
|
||||
return null;
|
||||
@@ -54,7 +52,7 @@ class CNETFranceBridge extends FeedExpander
|
||||
}
|
||||
|
||||
foreach ($this->bannedURL as $term) {
|
||||
if (preg_match('/' . $term . '/mi', $item['uri']) === 1) {
|
||||
if (preg_match('#' . $term . '#mi', $item['uri'])) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
@@ -36,12 +36,65 @@ class CVEDetailsBridge extends BridgeAbstract
|
||||
private $vendor = '';
|
||||
private $product = '';
|
||||
|
||||
// Return the URL to query.
|
||||
// Because of the optional product ID, we need to attach it if it is
|
||||
// set. The search result page has the exact same structure (with and
|
||||
// without the product ID).
|
||||
private function buildUrl()
|
||||
public function collectData()
|
||||
{
|
||||
if ($this->html == null) {
|
||||
$this->fetchContent();
|
||||
}
|
||||
|
||||
foreach ($this->html->find('#searchresults > .row') as $i => $tr) {
|
||||
// There are some optional vulnerability types, which will be
|
||||
// added to the categories as well as the CWE number -- which is
|
||||
// always given.
|
||||
$categories = [$this->vendor];
|
||||
$enclosures = [];
|
||||
|
||||
$detailLink = $tr->find('h3 > a', 0);
|
||||
$detailHtml = getSimpleHTMLDOM($detailLink->href);
|
||||
|
||||
// The CVE number itself
|
||||
$title = $tr->find('h3 > a', 0)->innertext;
|
||||
$content = $tr->find('.cvesummarylong', 0)->innertext;
|
||||
$cweList = $detailHtml->find('h2', 2)->next_sibling();
|
||||
foreach ($cweList->find('li') as $li) {
|
||||
$cweWithDescription = $li->find('a', 0)->innertext ?? '';
|
||||
|
||||
if (preg_match('/CWE-(\d+)/', $cweWithDescription, $cwe)) {
|
||||
$categories[] = 'CWE-' . $cwe[1];
|
||||
$enclosures[] = 'https://cwe.mitre.org/data/definitions/' . $cwe[1] . '.html';
|
||||
}
|
||||
}
|
||||
|
||||
if ($this->product != '') {
|
||||
$categories[] = $this->product;
|
||||
}
|
||||
|
||||
$this->items[] = [
|
||||
'uri' => 'https://cvedetails.com/' . $detailHtml->find('h1 > a', 0)->href,
|
||||
'title' => $title,
|
||||
'timestamp' => $tr->find('[data-tsvfield="publishDate"]', 0)->innertext,
|
||||
'content' => $content,
|
||||
'categories' => $categories,
|
||||
'enclosures' => $enclosures,
|
||||
'uid' => $title,
|
||||
];
|
||||
|
||||
// We only want to fetch the latest 10 CVEs
|
||||
if (count($this->items) >= 10) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Make the actual request to cvedetails.com and stores the response
|
||||
// (HTML) for later use and extract vendor and product from it.
|
||||
private function fetchContent()
|
||||
{
|
||||
// build url
|
||||
// Return the URL to query.
|
||||
// Because of the optional product ID, we need to attach it if it is
|
||||
// set. The search result page has the exact same structure (with and
|
||||
// without the product ID).
|
||||
$url = self::URI . '/vulnerability-list/vendor_id-' . $this->getInput('vendor_id');
|
||||
if ($this->getInput('product_id') !== '') {
|
||||
$url .= '/product_id-' . $this->getInput('product_id');
|
||||
@@ -51,22 +104,12 @@ class CVEDetailsBridge extends BridgeAbstract
|
||||
// number, which should be mostly accurate.
|
||||
$url .= '?order=1'; // Order by CVE number DESC
|
||||
|
||||
return $url;
|
||||
}
|
||||
|
||||
// Make the actual request to cvedetails.com and stores the response
|
||||
// (HTML) for later use and extract vendor and product from it.
|
||||
private function fetchContent()
|
||||
{
|
||||
$html = getSimpleHTMLDOM($this->buildUrl());
|
||||
$html = getSimpleHTMLDOM($url);
|
||||
$this->html = defaultLinkTo($html, self::URI);
|
||||
|
||||
$vendor = $html->find('#contentdiv h1 > a', 0);
|
||||
if ($vendor == null) {
|
||||
returnServerError('Invalid Vendor ID ' .
|
||||
$this->getInput('vendor_id') .
|
||||
' or Product ID ' .
|
||||
$this->getInput('product_id'));
|
||||
returnServerError('Invalid Vendor ID ' . $this->getInput('vendor_id') . ' or Product ID ' . $this->getInput('product_id'));
|
||||
}
|
||||
$this->vendor = $vendor->innertext;
|
||||
|
||||
@@ -76,7 +119,6 @@ class CVEDetailsBridge extends BridgeAbstract
|
||||
}
|
||||
}
|
||||
|
||||
// Build the name of the feed.
|
||||
public function getName()
|
||||
{
|
||||
if ($this->getInput('vendor_id') == '') {
|
||||
@@ -94,57 +136,4 @@ class CVEDetailsBridge extends BridgeAbstract
|
||||
|
||||
return $name;
|
||||
}
|
||||
|
||||
// Pull the data from the HTML response and fill the items..
|
||||
public function collectData()
|
||||
{
|
||||
if ($this->html == null) {
|
||||
$this->fetchContent();
|
||||
}
|
||||
|
||||
foreach ($this->html->find('#searchresults > .row') as $i => $tr) {
|
||||
// There are some optional vulnerability types, which will be
|
||||
// added to the categories as well as the CWE number -- which is
|
||||
// always given.
|
||||
$categories = [$this->vendor];
|
||||
$enclosures = [];
|
||||
|
||||
$detailLink = $tr->find('.cveheader > h3 > a', 0);
|
||||
$detailHtml = getSimpleHTMLDOM($detailLink->href);
|
||||
|
||||
$div = $detailHtml->find('.cvedetailssummary', 0);
|
||||
|
||||
// The CVE number itself
|
||||
$title = $div->find('h1 > a', 0)->innertext;
|
||||
$content = $div->find('.ssc-paragraph', 0)->innertext;
|
||||
$cweList = $detailHtml->find('h2', 2)->next_sibling();
|
||||
foreach ($cweList->find('li') as $li) {
|
||||
$cweWithDescription = $li->find('a', 0)->innertext;
|
||||
preg_match('/CWE-(\d+)/', $cweWithDescription, $cwe);
|
||||
if (count($cwe) > 1) {
|
||||
$categories[] = 'CWE-' . $cwe[1];
|
||||
$enclosures[] = 'https://cwe.mitre.org/data/definitions/' . $cwe[1] . '.html';
|
||||
}
|
||||
}
|
||||
|
||||
if ($this->product != '') {
|
||||
$categories[] = $this->product;
|
||||
}
|
||||
|
||||
$this->items[] = [
|
||||
'uri' => 'https://cvedetails.com/' . $detailHtml->find('h1 > a', 0)->href,
|
||||
'title' => $title,
|
||||
'timestamp' => $tr->find('td', 5)->innertext,
|
||||
'content' => $content,
|
||||
'categories' => $categories,
|
||||
'enclosures' => $enclosures,
|
||||
'uid' => $title,
|
||||
];
|
||||
|
||||
// We only want to fetch the latest 10 CVEs
|
||||
if (count($this->items) >= 10) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -9,8 +9,7 @@ class CarThrottleBridge extends BridgeAbstract
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$news = getSimpleHTMLDOMCached(self::URI . 'news')
|
||||
or returnServerError('could not retrieve page');
|
||||
$news = getSimpleHTMLDOMCached(self::URI . 'news');
|
||||
|
||||
$this->items[] = [];
|
||||
|
||||
@@ -22,23 +21,30 @@ class CarThrottleBridge extends BridgeAbstract
|
||||
$item['uri'] = self::URI . $titleElement->getAttribute('href');
|
||||
$item['title'] = $titleElement->innertext;
|
||||
|
||||
$articlePage = getSimpleHTMLDOMCached($item['uri'])
|
||||
or returnServerError('could not retrieve page');
|
||||
$articlePage = getSimpleHTMLDOMCached($item['uri']);
|
||||
|
||||
$item['author'] = $articlePage->find('div.author div')[1]->innertext;
|
||||
|
||||
$dinges = $articlePage->find('div.main-body')[0];
|
||||
//remove ads
|
||||
foreach ($dinges->find('aside') as $ad) {
|
||||
$ad->outertext = '';
|
||||
$dinges->save();
|
||||
$authorDiv = $articlePage->find('div.author div');
|
||||
if ($authorDiv) {
|
||||
$item['author'] = $authorDiv[1]->innertext;
|
||||
}
|
||||
|
||||
$item['content'] = $articlePage->find('div.summary')[0] .
|
||||
$articlePage->find('figure.main-image')[0] .
|
||||
$dinges;
|
||||
$dinges = $articlePage->find('div.main-body')[0] ?? null;
|
||||
//remove ads
|
||||
if ($dinges) {
|
||||
foreach ($dinges->find('aside') as $ad) {
|
||||
$ad->outertext = '';
|
||||
$dinges->save();
|
||||
}
|
||||
}
|
||||
|
||||
$var = $articlePage->find('div.summary')[0] ?? '';
|
||||
$var1 = $articlePage->find('figure.main-image')[0] ?? '';
|
||||
$dinges1 = $dinges ?? '';
|
||||
|
||||
$item['content'] = $var .
|
||||
$var1 .
|
||||
$dinges1;
|
||||
|
||||
//add the item to the list
|
||||
array_push($this->items, $item);
|
||||
}
|
||||
}
|
||||
|
@@ -34,10 +34,8 @@ class CaschyBridge extends FeedExpander
|
||||
);
|
||||
}
|
||||
|
||||
protected function parseItem($feedItem)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($feedItem);
|
||||
|
||||
if (strpos($item['uri'], 'https://stadt-bremerhaven.de/') !== 0) {
|
||||
return $item;
|
||||
}
|
||||
|
@@ -57,9 +57,9 @@ class CeskaTelevizeBridge extends BridgeAbstract
|
||||
$this->feedName .= " ({$category})";
|
||||
}
|
||||
|
||||
foreach ($html->find('#episodeListSection a[data-testid=next-link]') as $element) {
|
||||
foreach ($html->find('#episodeListSection a[data-testid=card]') as $element) {
|
||||
$itemTitle = $element->find('h3', 0);
|
||||
$itemContent = $element->find('div[class^=content-]', 0);
|
||||
$itemContent = $element->find('p[class^=content-]', 0);
|
||||
$itemDate = $element->find('div[class^=playTime-] span', 0);
|
||||
$itemThumbnail = $element->find('img', 0);
|
||||
$itemUri = self::URI . $element->getAttribute('href');
|
||||
|
@@ -79,9 +79,9 @@ class CodebergBridge extends BridgeAbstract
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$html = getSimpleHTMLDOM($this->getURI());
|
||||
|
||||
$html = defaultLinkTo($html, $this->getURI());
|
||||
$url = $this->getURI();
|
||||
$html = getSimpleHTMLDOM($url);
|
||||
$html = defaultLinkTo($html, $url);
|
||||
|
||||
switch ($this->queriedContext) {
|
||||
case 'Commits':
|
||||
@@ -205,22 +205,22 @@ class CodebergBridge extends BridgeAbstract
|
||||
*/
|
||||
private function extractIssues($html)
|
||||
{
|
||||
$div = $html->find('div.issue.list', 0);
|
||||
$issueList = $html->find('div#issue-list', 0);
|
||||
|
||||
foreach ($div->find('li.item') as $li) {
|
||||
foreach ($issueList->find('div.flex-item') as $div) {
|
||||
$item = [];
|
||||
|
||||
$number = trim($li->find('a.index,ml-0.mr-2', 0)->plaintext);
|
||||
$number = trim($div->find('a.index,ml-0.mr-2', 0)->plaintext);
|
||||
|
||||
$item['title'] = $li->find('a.title', 0)->plaintext . ' (' . $number . ')';
|
||||
$item['uri'] = $li->find('a.title', 0)->href;
|
||||
$item['title'] = $div->find('a.issue-title', 0)->plaintext . ' (' . $number . ')';
|
||||
$item['uri'] = $div->find('a.issue-title', 0)->href;
|
||||
|
||||
$time = $li->find('relative-time.time-since', 0);
|
||||
$time = $div->find('relative-time.time-since', 0);
|
||||
if ($time) {
|
||||
$item['timestamp'] = $time->datetime;
|
||||
}
|
||||
|
||||
$item['author'] = $li->find('div.desc', 0)->find('a', 1)->plaintext;
|
||||
//$item['author'] = $li->find('div.desc', 0)->find('a', 1)->plaintext;
|
||||
|
||||
// Fetch issue page
|
||||
$issuePage = getSimpleHTMLDOMCached($item['uri'], 3600);
|
||||
@@ -228,7 +228,7 @@ class CodebergBridge extends BridgeAbstract
|
||||
|
||||
$item['content'] = $issuePage->find('div.timeline-item.comment.first', 0)->find('div.render-content.markup', 0);
|
||||
|
||||
foreach ($li->find('a.ui.label') as $label) {
|
||||
foreach ($div->find('a.ui.label') as $label) {
|
||||
$item['categories'][] = $label->plaintext;
|
||||
}
|
||||
|
||||
@@ -260,7 +260,11 @@ class CodebergBridge extends BridgeAbstract
|
||||
}
|
||||
|
||||
$item['author'] = $div->find('a.author', 0)->innertext;
|
||||
$item['timestamp'] = $div->find('span.time-since', 0)->title;
|
||||
|
||||
$timeSince = $div->find('span.time-since', 0);
|
||||
if ($timeSince) {
|
||||
$item['timestamp'] = $timeSince->title;
|
||||
}
|
||||
|
||||
$this->items[] = $item;
|
||||
}
|
||||
@@ -271,22 +275,26 @@ class CodebergBridge extends BridgeAbstract
|
||||
*/
|
||||
private function extractPulls($html)
|
||||
{
|
||||
$div = $html->find('div.issue.list', 0);
|
||||
$div = $html->find('div#issue-list', 0);
|
||||
|
||||
foreach ($div->find('li.item') as $li) {
|
||||
$var2 = $div->find('div.flex-item');
|
||||
foreach ($var2 as $li) {
|
||||
$item = [];
|
||||
|
||||
$number = trim($li->find('a.index,ml-0.mr-2', 0)->plaintext);
|
||||
|
||||
$item['title'] = $li->find('a.title', 0)->plaintext . ' (' . $number . ')';
|
||||
$item['uri'] = $li->find('a.title', 0)->href;
|
||||
$a = $li->find('a.issue-title', 0);
|
||||
$item['title'] = $a->plaintext . ' (' . $number . ')';
|
||||
$item['uri'] = $a->href;
|
||||
|
||||
$time = $li->find('relative-time.time-since', 0);
|
||||
if ($time) {
|
||||
$item['timestamp'] = $time->datetime;
|
||||
}
|
||||
|
||||
$item['author'] = $li->find('div.desc', 0)->find('a', 1)->plaintext;
|
||||
// Extracting the author is a bit awkward after they changed their html
|
||||
//$desc = $li->find('div.desc', 0);
|
||||
//$item['author'] = $desc->find('a', 1)->plaintext;
|
||||
|
||||
// Fetch pull request page
|
||||
$pullRequestPage = getSimpleHTMLDOMCached($item['uri'], 3600);
|
||||
@@ -399,6 +407,9 @@ EOD;
|
||||
*/
|
||||
private function stripSvg($html)
|
||||
{
|
||||
if ($html === null) {
|
||||
return null;
|
||||
}
|
||||
if ($html->find('svg', 0)) {
|
||||
$html->find('svg', 0)->outertext = '';
|
||||
}
|
||||
|
@@ -12,9 +12,8 @@ class CommonDreamsBridge extends FeedExpander
|
||||
$this->collectExpandableDatas('http://www.commondreams.org/rss.xml', 10);
|
||||
}
|
||||
|
||||
protected function parseItem($newsItem)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($newsItem);
|
||||
$item['content'] = $this->extractContent($item['uri']);
|
||||
return $item;
|
||||
}
|
||||
|
@@ -13,11 +13,9 @@ class CourrierInternationalBridge extends FeedExpander
|
||||
$this->collectExpandableDatas(static::URI . 'feed/all/rss.xml', 20);
|
||||
}
|
||||
|
||||
protected function parseItem($feedItem)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($feedItem);
|
||||
|
||||
$articlePage = getSimpleHTMLDOMCached($feedItem->link);
|
||||
$articlePage = getSimpleHTMLDOMCached($item['uri']);
|
||||
$content = $articlePage->find('.article-text, depeche-text', 0);
|
||||
if (!$content) {
|
||||
return $item;
|
||||
|
@@ -51,15 +51,26 @@ class CssSelectorBridge extends BridgeAbstract
|
||||
EOT,
|
||||
'exampleValue' => ' | BlogName',
|
||||
],
|
||||
'discard_thumbnail' => [
|
||||
'name' => '[Optional] Discard thumbnail set by site author',
|
||||
'title' => 'Some sites set their logo as thumbnail for every article. Use this option to discard it.',
|
||||
'type' => 'checkbox',
|
||||
],
|
||||
'thumbnail_as_header' => [
|
||||
'name' => '[Optional] Insert thumbnail as article header',
|
||||
'title' => 'Insert article main image on top of article contents.',
|
||||
'type' => 'checkbox',
|
||||
],
|
||||
'limit' => self::LIMIT
|
||||
]
|
||||
];
|
||||
|
||||
private $feedName = '';
|
||||
protected $feedName = '';
|
||||
protected $homepageUrl = '';
|
||||
|
||||
public function getURI()
|
||||
{
|
||||
$url = $this->getInput('home_page');
|
||||
$url = $this->homepageUrl;
|
||||
if (empty($url)) {
|
||||
$url = parent::getURI();
|
||||
}
|
||||
@@ -76,29 +87,38 @@ class CssSelectorBridge extends BridgeAbstract
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$url = $this->getInput('home_page');
|
||||
$this->homepageUrl = $this->getInput('home_page');
|
||||
$url_selector = $this->getInput('url_selector');
|
||||
$url_pattern = $this->getInput('url_pattern');
|
||||
$content_selector = $this->getInput('content_selector');
|
||||
$content_cleanup = $this->getInput('content_cleanup');
|
||||
$title_cleanup = $this->getInput('title_cleanup');
|
||||
$discard_thumbnail = $this->getInput('discard_thumbnail');
|
||||
$thumbnail_as_header = $this->getInput('thumbnail_as_header');
|
||||
$limit = $this->getInput('limit') ?? 10;
|
||||
|
||||
$html = defaultLinkTo(getSimpleHTMLDOM($url), $url);
|
||||
$this->feedName = $this->getPageTitle($html, $title_cleanup);
|
||||
$html = defaultLinkTo(getSimpleHTMLDOM($this->homepageUrl), $this->homepageUrl);
|
||||
$this->feedName = $this->titleCleanup($this->getPageTitle($html), $title_cleanup);
|
||||
$items = $this->htmlFindEntries($html, $url_selector, $url_pattern, $limit, $content_cleanup);
|
||||
|
||||
if (empty($content_selector)) {
|
||||
$this->items = $items;
|
||||
} else {
|
||||
foreach ($items as $item) {
|
||||
$this->items[] = $this->expandEntryWithSelector(
|
||||
$item = $this->expandEntryWithSelector(
|
||||
$item['uri'],
|
||||
$content_selector,
|
||||
$content_cleanup,
|
||||
$title_cleanup,
|
||||
$item['title']
|
||||
);
|
||||
if ($discard_thumbnail && isset($item['enclosures'])) {
|
||||
unset($item['enclosures']);
|
||||
}
|
||||
if ($thumbnail_as_header && isset($item['enclosures'][0])) {
|
||||
$item['content'] = '<p><img src="' . $item['enclosures'][0] . '" /></p>' . $item['content'];
|
||||
}
|
||||
$this->items[] = $item;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -129,17 +149,27 @@ class CssSelectorBridge extends BridgeAbstract
|
||||
/**
|
||||
* Retrieve title from webpage URL or DOM
|
||||
* @param string|object $page URL or DOM to retrieve title from
|
||||
* @param string $title_cleanup optional string to remove from webpage title, e.g. " | BlogName"
|
||||
* @return string Webpage title
|
||||
*/
|
||||
protected function getPageTitle($page, $title_cleanup = null)
|
||||
protected function getPageTitle($page)
|
||||
{
|
||||
if (is_string($page)) {
|
||||
$page = getSimpleHTMLDOMCached($page);
|
||||
}
|
||||
$title = html_entity_decode($page->find('title', 0)->plaintext);
|
||||
if (!empty($title)) {
|
||||
$title = trim(str_replace($title_cleanup, '', $title));
|
||||
return $title;
|
||||
}
|
||||
|
||||
/**
|
||||
* Clean Article title. Remove constant part that appears in every title such as blog name.
|
||||
* @param string $title Title to clean, e.g. "Article Name | BlogName"
|
||||
* @param string $title_cleanup string to remove from webpage title, e.g. " | BlogName"
|
||||
* @return string Cleaned Title
|
||||
*/
|
||||
protected function titleCleanup($title, $title_cleanup)
|
||||
{
|
||||
if (!empty($title) && !empty($title_cleanup)) {
|
||||
return trim(str_replace($title_cleanup, '', $title));
|
||||
}
|
||||
return $title;
|
||||
}
|
||||
@@ -246,27 +276,34 @@ class CssSelectorBridge extends BridgeAbstract
|
||||
}
|
||||
|
||||
$entry_html = getSimpleHTMLDOMCached($entry_url);
|
||||
$item = html_find_seo_metadata($entry_html);
|
||||
|
||||
if (empty($item['uri'])) {
|
||||
$item['uri'] = $entry_url;
|
||||
}
|
||||
|
||||
if (empty($item['title'])) {
|
||||
$article_title = $this->getPageTitle($entry_html, $title_cleanup);
|
||||
if (!empty($title_default) && (empty($article_title) || $article_title === $this->feedName)) {
|
||||
$article_title = $title_default;
|
||||
}
|
||||
$item['title'] = $article_title;
|
||||
}
|
||||
|
||||
$item['title'] = $this->titleCleanup($item['title'], $title_cleanup);
|
||||
|
||||
$article_content = $entry_html->find($content_selector);
|
||||
|
||||
if (!empty($article_content)) {
|
||||
$article_content = $article_content[0];
|
||||
} else {
|
||||
returnClientError('Could not find content selector at URL: ' . $entry_url);
|
||||
$article_content = convertLazyLoading($article_content);
|
||||
$article_content = defaultLinkTo($article_content, $entry_url);
|
||||
$article_content = $this->cleanArticleContent($article_content, $content_cleanup);
|
||||
$item['content'] = $article_content;
|
||||
} else if (!empty($item['content'])) {
|
||||
$item['content'] .= '<br /><p><em>Could not extract full content, selector may need to be updated.</em></p>';
|
||||
}
|
||||
|
||||
$article_content = convertLazyLoading($article_content);
|
||||
$article_content = defaultLinkTo($article_content, $entry_url);
|
||||
$article_content = $this->cleanArticleContent($article_content, $content_cleanup);
|
||||
|
||||
$article_title = $this->getPageTitle($entry_html, $title_cleanup);
|
||||
if (!empty($title_default) && (empty($article_title) || $article_title === $this->feedName)) {
|
||||
$article_title = $title_default;
|
||||
}
|
||||
|
||||
$item = [];
|
||||
$item['uri'] = $entry_url;
|
||||
$item['title'] = $article_title;
|
||||
$item['content'] = $article_content;
|
||||
return $item;
|
||||
}
|
||||
}
|
||||
|
@@ -224,7 +224,7 @@ class CssSelectorComplexBridge extends BridgeAbstract
|
||||
{
|
||||
if (!empty($url_pattern)) {
|
||||
$url_pattern = '/' . str_replace('/', '\/', $url_pattern) . '/';
|
||||
$links = array_filter($links, function ($url) {
|
||||
$links = array_filter($links, function ($url) use ($url_pattern) {
|
||||
return preg_match($url_pattern, $url) === 1;
|
||||
});
|
||||
}
|
||||
@@ -245,7 +245,7 @@ class CssSelectorComplexBridge extends BridgeAbstract
|
||||
protected function getTitle($page, $title_cleanup)
|
||||
{
|
||||
if (is_string($page)) {
|
||||
$page = getSimpleHTMLDOMCached($page);
|
||||
$page = getSimpleHTMLDOMCached($page, 86400, $this->getHeaders());
|
||||
}
|
||||
$title = html_entity_decode($page->find('title', 0)->plaintext);
|
||||
if (!empty($title)) {
|
||||
@@ -302,7 +302,7 @@ class CssSelectorComplexBridge extends BridgeAbstract
|
||||
protected function htmlFindEntryElements($page, $entry_selector, $url_selector, $url_pattern = '', $limit = 0)
|
||||
{
|
||||
if (is_string($page)) {
|
||||
$page = getSimpleHTMLDOM($page);
|
||||
$page = getSimpleHTMLDOM($page, $this->getHeaders());
|
||||
}
|
||||
|
||||
$entryElements = $page->find($entry_selector);
|
||||
@@ -355,11 +355,11 @@ class CssSelectorComplexBridge extends BridgeAbstract
|
||||
*/
|
||||
protected function fetchArticleElementFromPage($entry_url, $content_selector)
|
||||
{
|
||||
$entry_html = getSimpleHTMLDOMCached($entry_url);
|
||||
$entry_html = getSimpleHTMLDOMCached($entry_url, 86400, $this->getHeaders());
|
||||
$article_content = $entry_html->find($content_selector, 0);
|
||||
|
||||
if (is_null($article_content)) {
|
||||
returnClientError('Could not article content at URL: ' . $entry_url);
|
||||
returnClientError('Could not get article content at URL: ' . $entry_url);
|
||||
}
|
||||
|
||||
$article_content = defaultLinkTo($article_content, $entry_url);
|
||||
@@ -415,10 +415,14 @@ class CssSelectorComplexBridge extends BridgeAbstract
|
||||
) {
|
||||
$article_content = convertLazyLoading($entry_html);
|
||||
|
||||
$article_title = '';
|
||||
if (is_null($title_selector)) {
|
||||
$article_title = $title_default;
|
||||
} else {
|
||||
$article_title = trim($entry_html->find($title_selector, 0)->innertext);
|
||||
$titleElement = $entry_html->find($title_selector, 0);
|
||||
if ($titleElement) {
|
||||
$article_title = trim($titleElement->innertext);
|
||||
}
|
||||
}
|
||||
|
||||
$author = null;
|
||||
|
119
bridges/CssSelectorFeedExpanderBridge.php
Normal file
119
bridges/CssSelectorFeedExpanderBridge.php
Normal file
@@ -0,0 +1,119 @@
|
||||
<?php
|
||||
|
||||
class CssSelectorFeedExpanderBridge extends CssSelectorBridge
|
||||
{
|
||||
const MAINTAINER = 'ORelio';
|
||||
const NAME = 'CSS Selector Feed Expander';
|
||||
const URI = 'https://github.com/RSS-Bridge/rss-bridge/';
|
||||
const DESCRIPTION = 'Expand any site RSS feed using CSS selectors (Advanced Users)';
|
||||
const PARAMETERS = [
|
||||
[
|
||||
'feed' => [
|
||||
'name' => 'Feed: URL of truncated RSS feed',
|
||||
'exampleValue' => 'https://example.com/feed.xml',
|
||||
'required' => true
|
||||
],
|
||||
'content_selector' => [
|
||||
'name' => 'Selector for each article content',
|
||||
'title' => <<<EOT
|
||||
This bridge works using CSS selectors, e.g. "div.article" will match <div class="article">.
|
||||
Everything inside that element becomes feed item content.
|
||||
EOT,
|
||||
'exampleValue' => 'article.content',
|
||||
'required' => true
|
||||
],
|
||||
'content_cleanup' => [
|
||||
'name' => '[Optional] Content cleanup: List of items to remove',
|
||||
'title' => 'Selector for unnecessary elements to remove inside article contents.',
|
||||
'exampleValue' => 'div.ads, div.comments',
|
||||
],
|
||||
'dont_expand_metadata' => [
|
||||
'name' => '[Optional] Don\'t expand metadata',
|
||||
'title' => "This bridge will attempt to fill missing fields using metadata from the webpage.\nCheck to disable.",
|
||||
'type' => 'checkbox',
|
||||
],
|
||||
'discard_thumbnail' => [
|
||||
'name' => '[Optional] Discard thumbnail set by site author',
|
||||
'title' => 'Some sites set their logo as thumbnail for every article. Use this option to discard it.',
|
||||
'type' => 'checkbox',
|
||||
],
|
||||
'thumbnail_as_header' => [
|
||||
'name' => '[Optional] Insert thumbnail as article header',
|
||||
'title' => 'Insert article main image on top of article contents.',
|
||||
'type' => 'checkbox',
|
||||
],
|
||||
'limit' => self::LIMIT
|
||||
]
|
||||
];
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$url = $this->getInput('feed');
|
||||
$content_selector = $this->getInput('content_selector');
|
||||
$content_cleanup = $this->getInput('content_cleanup');
|
||||
$dont_expand_metadata = $this->getInput('dont_expand_metadata');
|
||||
$discard_thumbnail = $this->getInput('discard_thumbnail');
|
||||
$thumbnail_as_header = $this->getInput('thumbnail_as_header');
|
||||
$limit = $this->getInput('limit');
|
||||
|
||||
$feedParser = new FeedParser();
|
||||
$xml = getContents($url);
|
||||
$source_feed = $feedParser->parseFeed($xml);
|
||||
$items = $source_feed['items'];
|
||||
|
||||
// Map Homepage URL (Default: Root page)
|
||||
if (isset($source_feed['uri'])) {
|
||||
$this->homepageUrl = $source_feed['uri'];
|
||||
} else {
|
||||
$this->homepageUrl = urljoin($url, '/');
|
||||
}
|
||||
|
||||
// Map Feed Name (Default: Domain name)
|
||||
if (isset($source_feed['title'])) {
|
||||
$this->feedName = $source_feed['title'];
|
||||
} else {
|
||||
$this->feedName = explode('/', urljoin($url, '/'))[2];
|
||||
}
|
||||
|
||||
// Apply item limit (Default: Global limit)
|
||||
if ($limit > 0) {
|
||||
$items = array_slice($items, 0, $limit);
|
||||
}
|
||||
|
||||
// Expand feed items (CssSelectorBridge)
|
||||
foreach ($items as $item_from_feed) {
|
||||
$item_expanded = $this->expandEntryWithSelector(
|
||||
$item_from_feed['uri'],
|
||||
$content_selector,
|
||||
$content_cleanup
|
||||
);
|
||||
|
||||
if ($dont_expand_metadata) {
|
||||
// Take feed item, only replace content from expanded data
|
||||
$content = $item_expanded['content'];
|
||||
$item_expanded = $item_from_feed;
|
||||
$item_expanded['content'] = $content;
|
||||
} else {
|
||||
// Take expanded item, but give priority to metadata already in source item
|
||||
foreach ($item_from_feed as $field => $val) {
|
||||
if ($field !== 'content' && !empty($val)) {
|
||||
$item_expanded[$field] = $val;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ($discard_thumbnail && isset($item_expanded['enclosures'])) {
|
||||
unset($item_expanded['enclosures']);
|
||||
}
|
||||
|
||||
if ($thumbnail_as_header && isset($item_expanded['enclosures'][0])) {
|
||||
$item_expanded['content'] = '<p><img src="'
|
||||
. $item_expanded['enclosures'][0]
|
||||
. '" /></p>'
|
||||
. $item_expanded['content'];
|
||||
}
|
||||
|
||||
$this->items[] = $item_expanded;
|
||||
}
|
||||
}
|
||||
}
|
@@ -47,8 +47,8 @@ class CubariBridge extends BridgeAbstract
|
||||
*/
|
||||
public function collectData()
|
||||
{
|
||||
$jsonSite = getContents($this->getInput('gist'));
|
||||
$jsonFile = json_decode($jsonSite, true);
|
||||
$json = getContents($this->getInput('gist'));
|
||||
$jsonFile = json_decode($json, true);
|
||||
|
||||
$this->mangaTitle = $jsonFile['title'];
|
||||
|
||||
@@ -66,12 +66,14 @@ class CubariBridge extends BridgeAbstract
|
||||
{
|
||||
$url = $this->getInput('gist');
|
||||
|
||||
preg_match('/\/([a-z]*)\.githubusercontent.com(.*)/', $url, $matches);
|
||||
|
||||
// raw or gist is first match.
|
||||
$unencoded = $matches[1] . $matches[2];
|
||||
|
||||
return base64_encode($unencoded);
|
||||
if (preg_match('/\/([a-z]*)\.githubusercontent.com(.*)/', $url, $matches)) {
|
||||
// raw or gist is first match.
|
||||
$unencoded = $matches[1] . $matches[2];
|
||||
return base64_encode($unencoded);
|
||||
} else {
|
||||
// todo: fix this
|
||||
return '';
|
||||
}
|
||||
}
|
||||
|
||||
private function getSanitizedHash($string)
|
||||
|
62
bridges/DagensNyheterDirektBridge.php
Normal file
62
bridges/DagensNyheterDirektBridge.php
Normal file
@@ -0,0 +1,62 @@
|
||||
<?PHP
|
||||
|
||||
class DagensNyheterDirektBridge extends BridgeAbstract
|
||||
{
|
||||
const NAME = 'Dagens Nyheter Direkt';
|
||||
const URI = 'https://www.dn.se/direkt/';
|
||||
const BASEURL = 'https://www.dn.se';
|
||||
const DESCRIPTION = 'Latest news summarised by Dagens Nyheter';
|
||||
const MAINTAINER = 'ajain-93';
|
||||
const LIMIT = 20;
|
||||
|
||||
public function getIcon()
|
||||
{
|
||||
return 'https://cdn.dn-static.se/images/favicon__c2dd3284b46ffdf4d520536e526065fa8.svg';
|
||||
}
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$NEWSURL = self::BASEURL . '/ajax/direkt/';
|
||||
|
||||
$html = getSimpleHTMLDOM($NEWSURL) or
|
||||
returnServerError('Could not request: ' . $NEWSURL);
|
||||
|
||||
foreach ($html->find('article') as $element) {
|
||||
$link = $element->find('button', 0)->getAttribute('data-link');
|
||||
$datetime = $element->getAttribute('data-publication-time');
|
||||
$url = self::BASEURL . $link;
|
||||
$title = $element->find('h2', 0)->plaintext;
|
||||
$author = $element->find('div.ds-byline__titles', 0)->plaintext;
|
||||
// Debug::log($link);
|
||||
// Debug::log($datetime);
|
||||
// Debug::log($title);
|
||||
// Debug::log($url);
|
||||
// Debug::log($author);
|
||||
|
||||
$article_content = $element->find('div.direkt-post__content', 0);
|
||||
$article_html = '';
|
||||
|
||||
$figure = $element->find('figure', 0);
|
||||
|
||||
if ($figure) {
|
||||
$article_html = $figure->find('img', 0) . '<p><i>' . $figure->find('figcaption', 0) . '</i></p>';
|
||||
}
|
||||
|
||||
foreach ($article_content->find('p') as $p) {
|
||||
$article_html = $article_html . $p;
|
||||
}
|
||||
|
||||
$this->items[] = [
|
||||
'uri' => $url,
|
||||
'title' => $title,
|
||||
'author' => trim($author),
|
||||
'timestamp' => $datetime,
|
||||
'content' => trim($article_html),
|
||||
];
|
||||
|
||||
if (count($this->items) > self::LIMIT) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@@ -56,9 +56,8 @@ class DarkReadingBridge extends FeedExpander
|
||||
$this->collectExpandableDatas($feed_url, $limit);
|
||||
}
|
||||
|
||||
protected function parseItem($newsItem)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($newsItem);
|
||||
$article = getSimpleHTMLDOMCached($item['uri']);
|
||||
$item['content'] = $this->extractArticleContent($article);
|
||||
$item['enclosures'] = []; //remove author profile picture
|
||||
|
@@ -43,9 +43,8 @@ class DauphineLibereBridge extends FeedExpander
|
||||
$this->collectExpandableDatas($url, 10);
|
||||
}
|
||||
|
||||
protected function parseItem($newsItem)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($newsItem);
|
||||
$item['content'] = $this->extractContent($item['uri']);
|
||||
return $item;
|
||||
}
|
||||
|
@@ -1909,6 +1909,8 @@ class DealabsBridge extends PepperBridgeAbstract
|
||||
'context-group' => 'Deals par groupe',
|
||||
'context-talk' => 'Surveillance Discussion',
|
||||
'uri-group' => 'groupe/',
|
||||
'uri-deal' => 'bons-plans/',
|
||||
'uri-merchant' => 'search/bons-plans?merchant-id=',
|
||||
'request-error' => 'Impossible de joindre Dealabs',
|
||||
'thread-error' => 'Impossible de déterminer l\'ID de la discussion. Vérifiez l\'URL que vous avez entré',
|
||||
'no-results' => 'Il n'y a rien à afficher pour le moment :(',
|
||||
|
@@ -4,7 +4,7 @@ class DemoBridge extends BridgeAbstract
|
||||
{
|
||||
const MAINTAINER = 'teromene';
|
||||
const NAME = 'DemoBridge';
|
||||
const URI = 'http://github.com/rss-bridge/rss-bridge';
|
||||
const URI = 'https://github.com/rss-bridge/rss-bridge';
|
||||
const DESCRIPTION = 'Bridge used for demos';
|
||||
const CACHE_TIMEOUT = 15;
|
||||
|
||||
|
62
bridges/DemosBerlinBridge.php
Normal file
62
bridges/DemosBerlinBridge.php
Normal file
@@ -0,0 +1,62 @@
|
||||
<?php
|
||||
|
||||
class DemosBerlinBridge extends BridgeAbstract
|
||||
{
|
||||
const NAME = 'Demos Berlin';
|
||||
const URI = 'https://www.berlin.de/polizei/service/versammlungsbehoerde/versammlungen-aufzuege/';
|
||||
const CACHE_TIMEOUT = 3 * 60 * 60;
|
||||
const DESCRIPTION = 'Angezeigte Versammlungen und Aufzüge in Berlin';
|
||||
const MAINTAINER = 'knrdl';
|
||||
const PARAMETERS = [[
|
||||
'days' => [
|
||||
'name' => 'Tage',
|
||||
'type' => 'number',
|
||||
'title' => 'Einträge für die nächsten Tage zurückgeben',
|
||||
'required' => true,
|
||||
'defaultValue' => 7,
|
||||
]
|
||||
]];
|
||||
|
||||
public function getIcon()
|
||||
{
|
||||
return 'https://www.berlin.de/i9f/r1/images/favicon/favicon.ico';
|
||||
}
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$json = getContents('https://www.berlin.de/polizei/service/versammlungsbehoerde/versammlungen-aufzuege/index.php/index/all.json');
|
||||
$jsonFile = json_decode($json, true);
|
||||
|
||||
$daysInterval = DateInterval::createFromDateString($this->getInput('days') . ' day');
|
||||
$maxTargetDate = date_add(new DateTime('now'), $daysInterval);
|
||||
|
||||
foreach ($jsonFile['index'] as $entry) {
|
||||
$entryDay = implode('-', array_reverse(explode('.', $entry['datum']))); // dd.mm.yyyy to yyyy-mm-dd
|
||||
$ts = (new DateTime())->setTimestamp(strtotime($entryDay));
|
||||
if ($ts <= $maxTargetDate) {
|
||||
$item = [];
|
||||
$item['uri'] = 'https://www.berlin.de/polizei/service/versammlungsbehoerde/versammlungen-aufzuege/index.php/detail/' . $entry['id'];
|
||||
$item['timestamp'] = $entryDay . ' ' . $entry['von'];
|
||||
$item['title'] = $entry['thema'];
|
||||
$location = $entry['strasse_nr'] . ' ' . $entry['plz'];
|
||||
$locationQuery = http_build_query(['query' => $location]);
|
||||
$item['content'] = <<<HTML
|
||||
<h1>{$entry['thema']}</h1>
|
||||
<p>📅 <time datetime="{$item['timestamp']}">{$entry['datum']} {$entry['von']} - {$entry['bis']}</time></p>
|
||||
<a href="https://www.openstreetmap.org/search?$locationQuery">
|
||||
📍 {$location}
|
||||
</a>
|
||||
<p>{$entry['aufzugsstrecke']}</p>
|
||||
HTML;
|
||||
$item['uid'] = $this->getSanitizedHash($entry['datum'] . '-' . $entry['von'] . '-' . $entry['bis'] . '-' . $entry['thema']);
|
||||
|
||||
$this->items[] = $item;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private function getSanitizedHash($string)
|
||||
{
|
||||
return hash('sha1', preg_replace('/[^a-zA-Z0-9]/', '', strtolower($string)));
|
||||
}
|
||||
}
|
@@ -71,10 +71,8 @@ class DeutscheWelleBridge extends FeedExpander
|
||||
$this->collectExpandableDatas($this->getInput('feed'));
|
||||
}
|
||||
|
||||
protected function parseItem($item)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
$parsedUrl = parse_url($item['uri']);
|
||||
unset($parsedUrl['query']);
|
||||
$url = $this->unparseUrl($parsedUrl);
|
||||
|
28
bridges/DeutscherAeroClubBridge.php
Normal file
28
bridges/DeutscherAeroClubBridge.php
Normal file
@@ -0,0 +1,28 @@
|
||||
<?php
|
||||
|
||||
class DeutscherAeroClubBridge extends XPathAbstract
|
||||
{
|
||||
const NAME = 'Deutscher Aero Club';
|
||||
const URI = 'https://www.daec.de/news/';
|
||||
const DESCRIPTION = 'News aus Luftsport und Dachverband';
|
||||
const MAINTAINER = 'hleskien';
|
||||
|
||||
const FEED_SOURCE_URL = 'https://www.daec.de/news/';
|
||||
const XPATH_EXPRESSION_FEED_ICON = './/link[@rel="icon"][1]/@href';
|
||||
const XPATH_EXPRESSION_ITEM = '//div[contains(@class, "news-list-view")]/div[contains(@class, "article")]';
|
||||
const XPATH_EXPRESSION_ITEM_TITLE = './/span[@itemprop="headline"]';
|
||||
const XPATH_EXPRESSION_ITEM_CONTENT = './/div[@itemprop="description"]/p';
|
||||
const XPATH_EXPRESSION_ITEM_URI = './/div[@class="news-header"]//a/@href';
|
||||
//const XPATH_EXPRESSION_ITEM_AUTHOR = './/';
|
||||
const XPATH_EXPRESSION_ITEM_TIMESTAMP = './/time/@datetime';
|
||||
const XPATH_EXPRESSION_ITEM_ENCLOSURES = './/img/@src';
|
||||
//const XPATH_EXPRESSION_ITEM_CATEGORIES = './/';
|
||||
|
||||
protected function formatItemTimestamp($value)
|
||||
{
|
||||
$dti = DateTimeImmutable::createFromFormat('Y-m-d', $value);
|
||||
$dti = $dti->setTime(0, 0, 0);
|
||||
return $dti->getTimestamp();
|
||||
}
|
||||
}
|
||||
|
@@ -163,19 +163,6 @@ class DeveloppezDotComBridge extends FeedExpander
|
||||
]
|
||||
];
|
||||
|
||||
/**
|
||||
* Return the RSS url for selected domain
|
||||
*/
|
||||
private function getRssUrl()
|
||||
{
|
||||
$domain = $this->getInput('domain');
|
||||
if (!empty($domain)) {
|
||||
return 'https://' . $domain . self::DOMAIN . self::RSS_URL;
|
||||
}
|
||||
|
||||
return self::URI . self::RSS_URL;
|
||||
}
|
||||
|
||||
/**
|
||||
* Grabs the RSS item from Developpez.com
|
||||
*/
|
||||
@@ -189,15 +176,12 @@ class DeveloppezDotComBridge extends FeedExpander
|
||||
* Parse the content of every RSS item. And will try to get the full article
|
||||
* pointed by the item URL intead of the default abstract.
|
||||
*/
|
||||
protected function parseItem($newsItem)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
if (count($this->items) >= $this->getInput('limit')) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// This function parse each entry in the RSS with the default parse
|
||||
$item = parent::parseItem($newsItem);
|
||||
|
||||
// There is a bug in Developpez RSS, coma are writtent as '~?' in the
|
||||
// title, so I have to fix it manually
|
||||
$item['title'] = $this->fixComaInTitle($item['title']);
|
||||
@@ -229,6 +213,19 @@ class DeveloppezDotComBridge extends FeedExpander
|
||||
return $item;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the RSS url for selected domain
|
||||
*/
|
||||
private function getRssUrl()
|
||||
{
|
||||
$domain = $this->getInput('domain');
|
||||
if (!empty($domain)) {
|
||||
return 'https://' . $domain . self::DOMAIN . self::RSS_URL;
|
||||
}
|
||||
|
||||
return self::URI . self::RSS_URL;
|
||||
}
|
||||
|
||||
/**
|
||||
* Replace '~?' by a proper coma ','
|
||||
*/
|
||||
@@ -334,6 +331,9 @@ class DeveloppezDotComBridge extends FeedExpander
|
||||
*/
|
||||
private function isHtmlTagNotTxt($txt)
|
||||
{
|
||||
if ($txt === '') {
|
||||
return false;
|
||||
}
|
||||
$html = str_get_html($txt);
|
||||
return $html && $html->root && count($html->root->children) > 0;
|
||||
}
|
||||
|
@@ -31,7 +31,11 @@ class EBayBridge extends BridgeAbstract
|
||||
|
||||
public function getName()
|
||||
{
|
||||
$urlQueries = explode('&', parse_url($this->getInput('url'), PHP_URL_QUERY));
|
||||
$url = $this->getInput('url');
|
||||
if (!$url) {
|
||||
return parent::getName();
|
||||
}
|
||||
$urlQueries = explode('&', parse_url($url, PHP_URL_QUERY));
|
||||
|
||||
$searchQuery = array_reduce($urlQueries, function ($q, $p) {
|
||||
if (preg_match('/^_nkw=(.+)$/i', $p, $matches)) {
|
||||
|
85
bridges/EDDHPiRepsBridge.php
Normal file
85
bridges/EDDHPiRepsBridge.php
Normal file
@@ -0,0 +1,85 @@
|
||||
<?php
|
||||
|
||||
class EDDHPiRepsBridge extends BridgeAbstract
|
||||
{
|
||||
const NAME = 'EDDH.de PIREPs';
|
||||
const URI = 'https://eddh.de/info/pireps_08days.php';
|
||||
const DESCRIPTION = 'Erfahrungen und Tipps von Piloten für Piloten: Die Einträge der letzten 8 Tage';
|
||||
const MAINTAINER = 'hleskien';
|
||||
//const PARAMETERS = [];
|
||||
//const CACHE_TIMEOUT = 3600;
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$dom = getSimpleHTMLDOM(self::URI);
|
||||
foreach ($dom->find('table table table td') as $itemnode) {
|
||||
$texts = $this->extractTexts($itemnode->find('text, br'));
|
||||
$timestamp = $itemnode->find('.su_dat', 0)->innertext();
|
||||
$uri = $itemnode->find('.pir_hd a', 0)->href;
|
||||
$this->items[] = [
|
||||
'timestamp' => $this->formatItemTimestamp($timestamp),
|
||||
'title' => $this->formatItemTitle($texts),
|
||||
'uri' => $this->formatItemUri($uri),
|
||||
'author' => $this->formatItemAuthor($texts),
|
||||
'content' => $this->formatItemContent($texts)
|
||||
];
|
||||
}
|
||||
}
|
||||
|
||||
public function getIcon()
|
||||
{
|
||||
return 'https://eddh.de/favicon.ico';
|
||||
}
|
||||
|
||||
private function extractTexts($nodes)
|
||||
{
|
||||
$texts = [];
|
||||
$i = 0;
|
||||
foreach ($nodes as $node) {
|
||||
$text = trim($node->outertext());
|
||||
if ($node->tag == 'br') {
|
||||
$texts[$i++] = "\n";
|
||||
} elseif (($node->tag == 'text') && ($text != '')) {
|
||||
$text = iconv('Windows-1252', 'UTF-8', $text);
|
||||
$text = str_replace(' ', '', $text);
|
||||
$texts[$i++] = $text;
|
||||
}
|
||||
}
|
||||
return $texts;
|
||||
}
|
||||
|
||||
protected function formatItemAuthor($texts)
|
||||
{
|
||||
$pos = array_search('Name:', $texts);
|
||||
return $texts[$pos + 1];
|
||||
}
|
||||
|
||||
protected function formatItemContent($texts)
|
||||
{
|
||||
$pos1 = array_search('Bemerkungen:', $texts);
|
||||
$pos2 = array_search('Bewertung:', $texts);
|
||||
$content = '';
|
||||
for ($i = $pos1 + 1; $i < $pos2; $i++) {
|
||||
$content .= $texts[$i];
|
||||
}
|
||||
return trim($content);
|
||||
}
|
||||
|
||||
protected function formatItemTitle($texts)
|
||||
{
|
||||
$texts[5] = ltrim($texts[5], '(');
|
||||
return implode(' ', [$texts[1], $texts[2], $texts[3], $texts[5]]);
|
||||
}
|
||||
|
||||
protected function formatItemTimestamp($value)
|
||||
{
|
||||
$value = str_replace('Eintrag vom', '', $value);
|
||||
$value = trim($value);
|
||||
return strtotime($value);
|
||||
}
|
||||
|
||||
protected function formatItemUri($value)
|
||||
{
|
||||
return 'https://eddh.de/info/' . $value;
|
||||
}
|
||||
}
|
42
bridges/EDDHPresseschauBridge.php
Normal file
42
bridges/EDDHPresseschauBridge.php
Normal file
@@ -0,0 +1,42 @@
|
||||
<?php
|
||||
|
||||
class EDDHPresseschauBridge extends XPathAbstract
|
||||
{
|
||||
const NAME = 'EDDH.de Presseschau';
|
||||
const URI = 'https://eddh.de/presse/presseschau.php';
|
||||
const DESCRIPTION = 'Luftfahrt-Presseschau: Presse-Artikel aus der Luftfahrt';
|
||||
const MAINTAINER = 'hleskien';
|
||||
|
||||
const FEED_SOURCE_URL = 'https://eddh.de/presse/presseschau.php';
|
||||
//const XPATH_EXPRESSION_FEED_ICON = './/link[@rel="icon"]/@href';
|
||||
const XPATH_EXPRESSION_ITEM = '//table//table[.//p[@class="pressnews"]]//td';
|
||||
const XPATH_EXPRESSION_ITEM_TITLE = './h4';
|
||||
const XPATH_EXPRESSION_ITEM_CONTENT = './p[@class="pressnews"]';
|
||||
const XPATH_EXPRESSION_ITEM_URI = './p[@class="pressnews"]/a/@href';
|
||||
const XPATH_EXPRESSION_ITEM_AUTHOR = './p[@class="quelle"]';
|
||||
const XPATH_EXPRESSION_ITEM_TIMESTAMP = './p[@class="quelle"]';
|
||||
//const XPATH_EXPRESSION_ITEM_ENCLOSURES = './';
|
||||
//const XPATH_EXPRESSION_ITEM_CATEGORIES = './/';
|
||||
|
||||
public function getIcon()
|
||||
{
|
||||
return 'https://eddh.de/favicon.ico';
|
||||
}
|
||||
|
||||
protected function formatItemAuthor($value)
|
||||
{
|
||||
$parts = explode('(', $value);
|
||||
$author = trim($parts[0]);
|
||||
return $author;
|
||||
}
|
||||
|
||||
protected function formatItemTimestamp($value)
|
||||
{
|
||||
$parts = explode('(', $value);
|
||||
$ws = ["\n", "\t", ' ', ')'];
|
||||
$value = str_replace($ws, '', $parts[1]);
|
||||
$dti = DateTimeImmutable::createFromFormat('d.m.Y', $value);
|
||||
$dti = $dti->setTime(0, 0, 0);
|
||||
return $dti->getTimestamp();
|
||||
}
|
||||
}
|
@@ -96,7 +96,7 @@ class EZTVBridge extends BridgeAbstract
|
||||
protected function getItemFromTorrent($torrent)
|
||||
{
|
||||
$item = [];
|
||||
$item['uri'] = $torrent->episode_url;
|
||||
$item['uri'] = $torrent->episode_url ?? $torrent->torrent_url;
|
||||
$item['author'] = $torrent->imdb_id;
|
||||
$item['timestamp'] = $torrent->date_released_unix;
|
||||
$item['title'] = $torrent->title;
|
||||
|
@@ -93,21 +93,21 @@ class EconomistBridge extends FeedExpander
|
||||
$limit = 30;
|
||||
}
|
||||
|
||||
$this->collectExpandableDatas('https://www.economist.com/' . $category . '/rss.xml', $limit);
|
||||
$url = 'https://www.economist.com/' . $category . '/rss.xml';
|
||||
$this->collectExpandableDatas($url, $limit);
|
||||
}
|
||||
|
||||
protected function parseItem($feedItem)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($feedItem);
|
||||
$html = getSimpleHTMLDOM($item['uri']);
|
||||
$dom = getSimpleHTMLDOM($item['uri']);
|
||||
|
||||
$article = $html->find('#new-article-template', 0);
|
||||
$article = $dom->find('#new-article-template', 0);
|
||||
if ($article == null) {
|
||||
$article = $html->find('main', 0);
|
||||
$article = $dom->find('main', 0);
|
||||
}
|
||||
if ($article) {
|
||||
$elem = $article->find('div', 0);
|
||||
list($content, $audio_url) = $this->processContent($html, $elem);
|
||||
list($content, $audio_url) = $this->processContent($dom, $elem);
|
||||
$item['content'] = $content;
|
||||
if ($audio_url != null) {
|
||||
$item['enclosures'] = [$audio_url];
|
||||
|
106
bridges/EdfPricesBridge.php
Normal file
106
bridges/EdfPricesBridge.php
Normal file
@@ -0,0 +1,106 @@
|
||||
<?php
|
||||
|
||||
class EdfPricesBridge extends BridgeAbstract
|
||||
{
|
||||
const NAME = 'EDF tarifs';
|
||||
// pull info from this site for now because EDF do not provide correct opendata
|
||||
const URI = 'https://www.jechange.fr';
|
||||
const DESCRIPTION = 'Fetches the latest infos of EDF prices';
|
||||
const MAINTAINER = 'floviolleau';
|
||||
const PARAMETERS = [
|
||||
[
|
||||
'contract' => [
|
||||
'name' => 'Choisir un contrat',
|
||||
'type' => 'list',
|
||||
// we can add later HCHP, EJP, base
|
||||
'values' => ['Tempo' => '/energie/edf/tarifs/tempo'],
|
||||
]
|
||||
]
|
||||
];
|
||||
const CACHE_TIMEOUT = 7200; // 2h
|
||||
|
||||
/**
|
||||
* @param simple_html_dom $html
|
||||
* @param string $contractUri
|
||||
* @return void
|
||||
*/
|
||||
private function tempo(simple_html_dom $html, string $contractUri): void
|
||||
{
|
||||
// current color and next
|
||||
$daysDom = $html->find('#calendrier', 0)->nextSibling()->find('.card--ejp');
|
||||
if ($daysDom && count($daysDom) === 2) {
|
||||
foreach ($daysDom as $dayDom) {
|
||||
$day = trim($dayDom->find('.card__title', 0)->innertext) . '/' . (new \DateTime('now'))->format(('Y'));
|
||||
$dayColor = $dayDom->find('.card-ejp__icon span', 0)->innertext;
|
||||
|
||||
$text = $day . ' - ' . $dayColor;
|
||||
$item['uri'] = self::URI . $contractUri;
|
||||
$item['title'] = $text;
|
||||
$item['author'] = self::MAINTAINER;
|
||||
$item['content'] = $text;
|
||||
$item['uid'] = hash('sha256', $item['title']);
|
||||
|
||||
$this->items[] = $item;
|
||||
}
|
||||
}
|
||||
|
||||
// colors
|
||||
$ulDom = $html->find('#tarif-de-l-offre-edf-tempo-current-date-html-year', 0)->nextSibling()->nextSibling()->nextSibling();
|
||||
$elementsDom = $ulDom->find('li');
|
||||
if ($elementsDom && count($elementsDom) === 3) {
|
||||
foreach ($elementsDom as $elementDom) {
|
||||
$item = [];
|
||||
|
||||
$matches = [];
|
||||
preg_match_all('/Jour (.*) : Heures (.*) : (.*) € \/ Heures (.*) : (.*) €/um', $elementDom->innertext, $matches, PREG_SET_ORDER, 0);
|
||||
|
||||
if ($matches && count($matches[0]) === 6) {
|
||||
for ($i = 0; $i < 2; $i++) {
|
||||
$text = 'Jour ' . $matches[0][1] . ' - Heures ' . $matches[0][2 + 2 * $i] . ' : ' . $matches[0][3 + 2 * $i] . '€';
|
||||
$item['uri'] = self::URI . $contractUri;
|
||||
$item['title'] = $text;
|
||||
$item['author'] = self::MAINTAINER;
|
||||
$item['content'] = $text;
|
||||
$item['uid'] = hash('sha256', $item['title']);
|
||||
|
||||
$this->items[] = $item;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// powers
|
||||
$ulPowerContract = $ulDom->nextSibling()->nextSibling();
|
||||
$elementsPowerContractDom = $ulPowerContract->find('li');
|
||||
if ($elementsPowerContractDom && count($elementsPowerContractDom) === 4) {
|
||||
foreach ($elementsPowerContractDom as $elementPowerContractDom) {
|
||||
$item = [];
|
||||
|
||||
$matches = [];
|
||||
preg_match_all('/(.*) kVA : (.*) €/um', $elementPowerContractDom->innertext, $matches, PREG_SET_ORDER, 0);
|
||||
|
||||
if ($matches && count($matches[0]) === 3) {
|
||||
$text = $matches[0][1] . ' kVA : ' . $matches[0][2] . '€';
|
||||
$item['uri'] = self::URI . $contractUri;
|
||||
$item['title'] = $text;
|
||||
$item['author'] = self::MAINTAINER;
|
||||
$item['content'] = $text;
|
||||
$item['uid'] = hash('sha256', $item['title']);
|
||||
|
||||
$this->items[] = $item;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$contract = $this->getKey('contract');
|
||||
$contractUri = $this->getInput('contract');
|
||||
$html = getSimpleHTMLDOM(self::URI . $contractUri);
|
||||
|
||||
if ($contract === 'Tempo') {
|
||||
$this->tempo($html, $contractUri);
|
||||
}
|
||||
}
|
||||
}
|
@@ -10,26 +10,26 @@ class EngadgetBridge extends FeedExpander
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$url = 'https://www.engadget.com/rss.xml';
|
||||
$max = 10;
|
||||
$this->collectExpandableDatas(static::URI . 'rss.xml', $max);
|
||||
$this->collectExpandableDatas($url, $max);
|
||||
}
|
||||
|
||||
protected function parseItem($newsItem)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($newsItem);
|
||||
$url = (string) $newsItem->link;
|
||||
if (!$url) {
|
||||
$itemUrl = trim($item['uri']);
|
||||
if (!$itemUrl) {
|
||||
return $item;
|
||||
}
|
||||
// todo: remove querystring tracking
|
||||
$articlePage = getSimpleHTMLDOM($url);
|
||||
$dom = getSimpleHTMLDOM($itemUrl);
|
||||
// figure contain's the main article image
|
||||
$article = $articlePage->find('figure', 0);
|
||||
$article = $dom->find('figure', 0);
|
||||
// .article-text has the actual article
|
||||
foreach ($articlePage->find('.article-text') as $element) {
|
||||
foreach ($dom->find('.article-text') as $element) {
|
||||
$article = $article . $element;
|
||||
}
|
||||
$item['content'] = $article;
|
||||
$item['content'] = $article ?? '';
|
||||
return $item;
|
||||
}
|
||||
}
|
||||
|
@@ -98,7 +98,7 @@ class ErowallBridge extends BridgeAbstract
|
||||
$ret .= 'dat/';
|
||||
break;
|
||||
default:
|
||||
$tag = $this->getInput('tag');
|
||||
$tag = $this->getInput('tag') ?? '';
|
||||
$ret .= 'teg/' . str_replace(' ', '+', $tag);
|
||||
}
|
||||
|
||||
|
@@ -1,5 +1,8 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Appears to be protected by cloudflare now
|
||||
*/
|
||||
class EsquerdaNetBridge extends FeedExpander
|
||||
{
|
||||
const MAINTAINER = 'somini';
|
||||
@@ -23,32 +26,14 @@ class EsquerdaNetBridge extends FeedExpander
|
||||
]
|
||||
];
|
||||
|
||||
public function getURI()
|
||||
{
|
||||
$type = $this->getInput('feed');
|
||||
return self::URI . '/rss/' . $type;
|
||||
}
|
||||
|
||||
public function getIcon()
|
||||
{
|
||||
return 'https://www.esquerda.net/sites/default/files/favicon_0.ico';
|
||||
}
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
parent::collectExpandableDatas($this->getURI());
|
||||
}
|
||||
|
||||
protected function parseItem($newsItem)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
# Fix Publish date
|
||||
$badDate = $newsItem->pubDate;
|
||||
preg_match('|(?P<day>\d\d)/(?P<month>\d\d)/(?P<year>\d\d\d\d) - (?P<hour>\d\d):(?P<minute>\d\d)|', $badDate, $d);
|
||||
$newsItem->pubDate = sprintf('%s-%s-%sT%s:%s', $d['year'], $d['month'], $d['day'], $d['hour'], $d['minute']);
|
||||
$item = parent::parseItem($newsItem);
|
||||
# Include all the content
|
||||
$uri = $item['uri'];
|
||||
$html = getSimpleHTMLDOMCached($uri);
|
||||
$html = getSimpleHTMLDOMCached($item['uri']);
|
||||
$content = $html->find('div#content div.content', 0);
|
||||
## Fix author
|
||||
$authorHTML = $html->find('.field-name-field-op-author a', 0);
|
||||
@@ -72,4 +57,15 @@ class EsquerdaNetBridge extends FeedExpander
|
||||
$item['content'] = $content;
|
||||
return $item;
|
||||
}
|
||||
|
||||
public function getURI()
|
||||
{
|
||||
$type = $this->getInput('feed');
|
||||
return self::URI . '/rss/' . $type;
|
||||
}
|
||||
|
||||
public function getIcon()
|
||||
{
|
||||
return 'https://www.esquerda.net/sites/default/files/favicon_0.ico';
|
||||
}
|
||||
}
|
||||
|
@@ -21,34 +21,18 @@ class FDroidBridge extends BridgeAbstract
|
||||
|
||||
public function getIcon()
|
||||
{
|
||||
return self::URI . 'assets/favicon.ico?v=8j6PKzW9Mk';
|
||||
return self::URI . 'assets/favicon.ico';
|
||||
}
|
||||
|
||||
private function getTimestamp($url)
|
||||
{
|
||||
$curlOptions = [
|
||||
CURLOPT_RETURNTRANSFER => true,
|
||||
CURLOPT_HEADER => true,
|
||||
CURLOPT_NOBODY => true,
|
||||
CURLOPT_CONNECTTIMEOUT => 19,
|
||||
CURLOPT_TIMEOUT => 19,
|
||||
CURLOPT_CUSTOMREQUEST => 'HEAD',
|
||||
CURLOPT_NOBODY => true,
|
||||
];
|
||||
$ch = curl_init($url);
|
||||
curl_setopt_array($ch, $curlOptions);
|
||||
$curlHeaders = curl_exec($ch);
|
||||
$curlError = curl_error($ch);
|
||||
curl_close($ch);
|
||||
if (!empty($curlError)) {
|
||||
return false;
|
||||
}
|
||||
$curlHeaders = explode("\n", $curlHeaders);
|
||||
$timestamp = false;
|
||||
foreach ($curlHeaders as $header) {
|
||||
if (strpos($header, 'Last-Modified') !== false) {
|
||||
$timestamp = str_replace('Last-Modified: ', '', $header);
|
||||
$timestamp = strtotime($timestamp);
|
||||
}
|
||||
}
|
||||
$reponse = getContents($url, [], $curlOptions, true);
|
||||
$lastModified = $reponse['headers']['last-modified'][0] ?? null;
|
||||
$timestamp = strtotime($lastModified ?? 'today');
|
||||
return $timestamp;
|
||||
}
|
||||
|
||||
|
@@ -85,7 +85,7 @@ class FallGuysBridge extends BridgeAbstract
|
||||
for ($i = 0; $i < count($mediaOptions); $i++) {
|
||||
if (property_exists($mediaOptions[$i], 'youtubeVideo')) {
|
||||
$videoUrl = 'https://youtu.be/' . $mediaOptions[$i]->youtubeVideo->contentId;
|
||||
$image = $mainContentOptions[$i]->image->src;
|
||||
$image = $mainContentOptions[$i]->image->src ?? '';
|
||||
|
||||
$content .= '<p>';
|
||||
|
||||
|
103
bridges/FarsideNitterBridge.php
Normal file
103
bridges/FarsideNitterBridge.php
Normal file
@@ -0,0 +1,103 @@
|
||||
<?php
|
||||
|
||||
class FarsideNitterBridge extends FeedExpander
|
||||
{
|
||||
const NAME = 'Farside Nitter Bridge';
|
||||
const DESCRIPTION = "Returns an user's recent tweets";
|
||||
const URI = 'https://farside.link/nitter/';
|
||||
const HOST = 'https://twitter.com/';
|
||||
const MAX_RETRIES = 3;
|
||||
const PARAMETERS = [
|
||||
[
|
||||
'username' => [
|
||||
'name' => 'username',
|
||||
'required' => true,
|
||||
'exampleValue' => 'NASA'
|
||||
],
|
||||
'noreply' => [
|
||||
'name' => 'Without replies',
|
||||
'type' => 'checkbox',
|
||||
'title' => 'Only return initial tweets'
|
||||
],
|
||||
'noretweet' => [
|
||||
'name' => 'Without retweets',
|
||||
'required' => false,
|
||||
'type' => 'checkbox',
|
||||
'title' => 'Hide retweets'
|
||||
],
|
||||
'linkbacktotwitter' => [
|
||||
'name' => 'Link back to twitter',
|
||||
'required' => false,
|
||||
'type' => 'checkbox',
|
||||
'title' => 'Rewrite links back to twitter.com'
|
||||
]
|
||||
],
|
||||
];
|
||||
|
||||
public function detectParameters($url)
|
||||
{
|
||||
if (preg_match('/^(https?:\/\/)?(www\.)?(nitter\.net|twitter\.com)\/([^\/?\n]+)/', $url, $matches) > 0) {
|
||||
return [
|
||||
'username' => $matches[4],
|
||||
'noreply' => true,
|
||||
'noretweet' => true,
|
||||
'linkbacktotwitter' => true
|
||||
];
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$this->getRSS();
|
||||
}
|
||||
|
||||
private function getRSS($attempt = 0)
|
||||
{
|
||||
try {
|
||||
$this->collectExpandableDatas(self::URI . $this->getInput('username') . '/rss');
|
||||
} catch (\Exception $e) {
|
||||
if ($attempt >= self::MAX_RETRIES) {
|
||||
throw $e;
|
||||
} else {
|
||||
$this->getRSS($attempt++);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
if ($this->getInput('noreply') && substr($item['title'], 0, 5) == 'R to ') {
|
||||
return;
|
||||
}
|
||||
if ($this->getInput('noretweet') && substr($item['title'], 0, 6) == 'RT by ') {
|
||||
return;
|
||||
}
|
||||
$item['title'] = truncate($item['title']);
|
||||
if (preg_match('/(\/status\/.+)/', $item['uri'], $matches) > 0) {
|
||||
if ($this->getInput('linkbacktotwitter')) {
|
||||
$item['uri'] = self::HOST . $this->getInput('username') . $matches[1];
|
||||
} else {
|
||||
$item['uri'] = self::URI . $this->getInput('username') . $matches[1];
|
||||
}
|
||||
}
|
||||
return $item;
|
||||
}
|
||||
|
||||
public function getName()
|
||||
{
|
||||
if (preg_match('/(.+) \//', parent::getName(), $matches) > 0) {
|
||||
return $matches[1];
|
||||
}
|
||||
return parent::getName();
|
||||
}
|
||||
|
||||
public function getURI()
|
||||
{
|
||||
if ($this->getInput('linkbacktotwitter')) {
|
||||
return self::HOST . $this->getInput('username');
|
||||
} else {
|
||||
return self::URI . $this->getInput('username');
|
||||
}
|
||||
}
|
||||
}
|
@@ -43,24 +43,4 @@ class FeedExpanderExampleBridge extends FeedExpander
|
||||
returnClientError('Unknown version ' . $this->getInput('version') . '!');
|
||||
}
|
||||
}
|
||||
|
||||
protected function parseItem($newsItem)
|
||||
{
|
||||
switch ($this->getInput('version')) {
|
||||
case 'rss_0_9_1':
|
||||
return $this->parseRss091Item($newsItem);
|
||||
break;
|
||||
case 'rss_1_0':
|
||||
return $this->parseRss1Item($newsItem);
|
||||
break;
|
||||
case 'rss_2_0':
|
||||
return $this->parseRss2Item($newsItem);
|
||||
break;
|
||||
case 'atom_1_0':
|
||||
return $this->parseATOMItem($newsItem);
|
||||
break;
|
||||
default:
|
||||
returnClientError('Unknown version ' . $this->getInput('version') . '!');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
23
bridges/FeedExpanderTestBridge.php
Normal file
23
bridges/FeedExpanderTestBridge.php
Normal file
@@ -0,0 +1,23 @@
|
||||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
class FeedExpanderTestBridge extends FeedExpander
|
||||
{
|
||||
const MAINTAINER = 'No maintainer';
|
||||
const NAME = 'Unnamed bridge';
|
||||
const URI = 'https://esdf.com/';
|
||||
const DESCRIPTION = 'No description provided';
|
||||
const PARAMETERS = [];
|
||||
const CACHE_TIMEOUT = 3600;
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$url = 'http://static.userland.com/gems/backend/sampleRss.xml'; // rss 0.91
|
||||
$url = 'http://feeds.nature.com/nature/rss/current?format=xml'; // rss 1.0
|
||||
$url = 'https://dvikan.no/feed.xml'; // rss 2.0
|
||||
$url = 'https://nedlasting.geonorge.no/geonorge/Tjenestefeed.xml'; // atom
|
||||
|
||||
$this->collectExpandableDatas($url);
|
||||
}
|
||||
}
|
43
bridges/FiaBridge.php
Normal file
43
bridges/FiaBridge.php
Normal file
@@ -0,0 +1,43 @@
|
||||
<?php
|
||||
|
||||
class FiaBridge extends BridgeAbstract
|
||||
{
|
||||
const NAME = 'Federation Internationale de l\'Automobile site feed';
|
||||
const URI = 'https://fia.com';
|
||||
const DESCRIPTION = 'Get the latest F1 documents from the fia site';
|
||||
const PARAMETERS = [];
|
||||
const CACHE_TIMEOUT = 900;
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$url = 'https://www.fia.com/documents/championships/fia-formula-one-world-championship-14/';
|
||||
$html = getSimpleHTMLDOM($url);
|
||||
$items = $html->find('li.document-row');
|
||||
foreach ($items as $item) {
|
||||
/** @var simple_html_dom $item */
|
||||
// Do something with each list item
|
||||
$title = trim($item->find('div.title', 0)->plaintext);
|
||||
$href = $item->find('a', 0)->href;
|
||||
$url = 'https://www.fia.com' . $href;
|
||||
|
||||
$date = $item->find('span.date-display-single', 0)->plaintext;
|
||||
|
||||
$item = [];
|
||||
$item['uri'] = $url;
|
||||
$item['title'] = $title;
|
||||
$item['timestamp'] = (string) DateTime::createFromFormat('d.m.y H:i', $date)->getTimestamp();
|
||||
;
|
||||
$item['author'] = 'Fia';
|
||||
$item['content'] = "Document on date $date: $title <br /><a href='$url'>$url</a>";
|
||||
$item['categories'] = 'Document';
|
||||
$item['uid'] = $title . $date;
|
||||
|
||||
$count = count($this->items);
|
||||
if ($count > 20) {
|
||||
break;
|
||||
} else {
|
||||
$this->items[] = $item;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@@ -184,6 +184,7 @@ class FicbookBridge extends BridgeAbstract
|
||||
];
|
||||
|
||||
$fixed_date = str_replace($ru_month, $en_month, $date);
|
||||
$fixed_date = str_replace(' г.', '', $fixed_date);
|
||||
|
||||
if ($fixed_date === $date) {
|
||||
Debug::log('Unable to fix date: ' . $date);
|
||||
|
@@ -73,10 +73,17 @@ class FilterBridge extends FeedExpander
|
||||
],
|
||||
]];
|
||||
|
||||
protected function parseItem($newItem)
|
||||
public function collectData()
|
||||
{
|
||||
$item = parent::parseItem($newItem);
|
||||
$url = $this->getInput('url');
|
||||
if (!Url::validate($url)) {
|
||||
returnClientError('The url parameter must either refer to http or https protocol.');
|
||||
}
|
||||
$this->collectExpandableDatas($this->getURI());
|
||||
}
|
||||
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
// Generate title from first 50 characters of content?
|
||||
if ($this->getInput('title_from_content') && array_key_exists('content', $item)) {
|
||||
$content = str_get_html($item['content']);
|
||||
@@ -158,13 +165,4 @@ class FilterBridge extends FeedExpander
|
||||
|
||||
return $url;
|
||||
}
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
if ($this->getInput('url') && substr($this->getInput('url'), 0, 4) !== 'http') {
|
||||
// just in case someone finds a way to access local files by playing with the url
|
||||
returnClientError('The url parameter must either refer to http or https protocol.');
|
||||
}
|
||||
$this->collectExpandableDatas($this->getURI());
|
||||
}
|
||||
}
|
||||
|
@@ -19,23 +19,6 @@ class FirefoxAddonsBridge extends BridgeAbstract
|
||||
const CACHE_TIMEOUT = 3600;
|
||||
|
||||
private $feedName = '';
|
||||
private $releaseDateRegex = '/Released ([\w, ]+) - ([\w. ]+)/';
|
||||
private $xpiFileRegex = '/([A-Za-z0-9_.-]+)\.xpi$/';
|
||||
private $outgoingRegex = '/https:\/\/prod.outgoing\.prod\.webservices\.mozgcp\.net\/v1\/(?:[A-z0-9]+)\//';
|
||||
|
||||
private $urlRegex = '/addons\.mozilla\.org\/(?:[\w-]+\/)?firefox\/addon\/([\w-]+)/';
|
||||
|
||||
public function detectParameters($url)
|
||||
{
|
||||
$params = [];
|
||||
|
||||
if (preg_match($this->urlRegex, $url, $matches)) {
|
||||
$params['id'] = $matches[1];
|
||||
return $params;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
@@ -52,7 +35,8 @@ class FirefoxAddonsBridge extends BridgeAbstract
|
||||
$item['uri'] = $this->getURI();
|
||||
$item['author'] = $author;
|
||||
|
||||
if (preg_match($this->releaseDateRegex, $li->find('div.AddonVersionCard-fileInfo', 0)->plaintext, $match)) {
|
||||
$releaseDateRegex = '/Released ([\w, ]+) - ([\w. ]+)/';
|
||||
if (preg_match($releaseDateRegex, $li->find('div.AddonVersionCard-fileInfo', 0)->plaintext, $match)) {
|
||||
$item['timestamp'] = $match[1];
|
||||
$size = $match[2];
|
||||
}
|
||||
@@ -68,7 +52,8 @@ class FirefoxAddonsBridge extends BridgeAbstract
|
||||
|
||||
$releaseNotes = $this->removeLinkRedirects($li->find('div.AddonVersionCard-releaseNotes', 0));
|
||||
|
||||
if (preg_match($this->xpiFileRegex, $downloadlink, $match)) {
|
||||
$xpiFileRegex = '/([A-Za-z0-9_.-]+)\.xpi$/';
|
||||
if (preg_match($xpiFileRegex, $downloadlink, $match)) {
|
||||
$xpiFilename = $match[0];
|
||||
}
|
||||
|
||||
@@ -110,10 +95,25 @@ EOD;
|
||||
*/
|
||||
private function removeLinkRedirects($html)
|
||||
{
|
||||
$outgoingRegex = '/https:\/\/prod.outgoing\.prod\.webservices\.mozgcp\.net\/v1\/(?:[A-z0-9]+)\//';
|
||||
foreach ($html->find('a') as $a) {
|
||||
$a->href = urldecode(preg_replace($this->outgoingRegex, '', $a->href));
|
||||
$a->href = urldecode(preg_replace($outgoingRegex, '', $a->href));
|
||||
}
|
||||
|
||||
return $html->innertext;
|
||||
}
|
||||
|
||||
public function detectParameters($url)
|
||||
{
|
||||
$params = [];
|
||||
|
||||
// Example: https://addons.mozilla.org/en-US/firefox/addon/ublock-origin
|
||||
$pattern = '/addons\.mozilla\.org\/(?:[\w-]+\/)?firefox\/addon\/([\w-]+)/';
|
||||
if (preg_match($pattern, $url, $matches)) {
|
||||
$params['id'] = $matches[1];
|
||||
return $params;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
47
bridges/FirefoxReleaseNotesBridge.php
Normal file
47
bridges/FirefoxReleaseNotesBridge.php
Normal file
@@ -0,0 +1,47 @@
|
||||
<?php
|
||||
|
||||
class FirefoxReleaseNotesBridge extends BridgeAbstract
|
||||
{
|
||||
const NAME = 'Firefox Release Notes';
|
||||
const URI = 'https://www.mozilla.org/en-US/firefox/';
|
||||
const DESCRIPTION = 'Retrieve the latest Firefox release notes.';
|
||||
const MAINTAINER = 'tillcash';
|
||||
const PARAMETERS = [
|
||||
[
|
||||
'platform' => [
|
||||
'name' => 'Platform',
|
||||
'type' => 'list',
|
||||
'values' => [
|
||||
'Desktop' => '',
|
||||
'Beta' => 'beta',
|
||||
'Nightly' => 'nightly',
|
||||
'Android' => 'android',
|
||||
'iOS' => 'ios',
|
||||
]
|
||||
]
|
||||
]
|
||||
];
|
||||
|
||||
public function getName()
|
||||
{
|
||||
$platform = $this->getKey('platform');
|
||||
return sprintf('Firefox %s Release Notes', $platform ?? '');
|
||||
}
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$platform = $this->getKey('platform');
|
||||
$url = self::URI . $this->getInput('platform') . '/notes/';
|
||||
$dom = getSimpleHTMLDOM($url);
|
||||
|
||||
$version = $dom->find('.c-release-version', 0)->innertext;
|
||||
|
||||
$this->items[] = [
|
||||
'content' => $dom->find('.c-release-notes', 0)->innertext,
|
||||
'timestamp' => $dom->find('.c-release-date', 0)->innertext,
|
||||
'title' => sprintf('Firefox %s %s Release Note', $platform, $version),
|
||||
'uri' => $url,
|
||||
'uid' => $platform . $version,
|
||||
];
|
||||
}
|
||||
}
|
22
bridges/FliegermagazinBridge.php
Normal file
22
bridges/FliegermagazinBridge.php
Normal file
@@ -0,0 +1,22 @@
|
||||
<?php
|
||||
|
||||
class FliegermagazinBridge extends XPathAbstract
|
||||
{
|
||||
const NAME = 'fliegermagazin';
|
||||
const URI = 'https://www.fliegermagazin.de/news-fuer-piloten/';
|
||||
const DESCRIPTION = 'News für Piloten';
|
||||
const MAINTAINER = 'hleskien';
|
||||
|
||||
const FEED_SOURCE_URL = 'https://www.fliegermagazin.de/news-fuer-piloten/';
|
||||
const XPATH_EXPRESSION_FEED_ICON = './/link[@rel="shortcut icon"]/@href';
|
||||
const XPATH_EXPRESSION_ITEM = '//article[@data-type="post"]';
|
||||
const XPATH_EXPRESSION_ITEM_TITLE = './/h3/a/text()';
|
||||
const XPATH_EXPRESSION_ITEM_CONTENT = './/h3/a/text()';
|
||||
const XPATH_EXPRESSION_ITEM_URI = './/h3/a/@href';
|
||||
const XPATH_EXPRESSION_ITEM_AUTHOR = './/p[@class="author-field"]';
|
||||
// Timestamp kann nur durch Laden des Artikels herausgefunden werden
|
||||
//const XPATH_EXPRESSION_ITEM_TIMESTAMP = './/span/i';
|
||||
const XPATH_EXPRESSION_ITEM_ENCLOSURES = './/img/@src';
|
||||
//const XPATH_EXPRESSION_ITEM_CATEGORIES = './/';
|
||||
}
|
||||
|
@@ -29,10 +29,8 @@ class FolhaDeSaoPauloBridge extends FeedExpander
|
||||
]
|
||||
];
|
||||
|
||||
protected function parseItem($item)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
if ($this->getInput('deep_crawl')) {
|
||||
$articleHTMLContent = getSimpleHTMLDOMCached($item['uri']);
|
||||
if ($articleHTMLContent) {
|
||||
|
@@ -12,12 +12,10 @@ class ForGifsBridge extends FeedExpander
|
||||
$this->collectExpandableDatas('https://forgifs.com/gallery/srss/7');
|
||||
}
|
||||
|
||||
protected function parseItem($feedItem)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($feedItem);
|
||||
|
||||
$content = str_get_html($item['content']);
|
||||
$img = $content->find('img', 0);
|
||||
$dom = str_get_html($item['content']);
|
||||
$img = $dom->find('img', 0);
|
||||
$poster = $img->src;
|
||||
|
||||
// The actual gif is the same path but its id must be decremented by one.
|
||||
@@ -34,7 +32,7 @@ class ForGifsBridge extends FeedExpander
|
||||
$img->width = 'auto';
|
||||
$img->height = 'auto';
|
||||
|
||||
$item['content'] = $content;
|
||||
$item['content'] = (string) $dom;
|
||||
|
||||
return $item;
|
||||
}
|
||||
|
@@ -14,15 +14,15 @@ class FreeCodeCampBridge extends FeedExpander
|
||||
$this->collectExpandableDatas('https://www.freecodecamp.org/news/rss/', 15);
|
||||
}
|
||||
|
||||
protected function parseItem($newsItem)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($newsItem);
|
||||
// $articlePage gets the entire page's contents
|
||||
$articlePage = getSimpleHTMLDOM($newsItem->link);
|
||||
$dom = getSimpleHTMLDOM($item['uri']);
|
||||
|
||||
// figure contain's the main article image
|
||||
$article = $articlePage->find('figure', 0);
|
||||
$article = $dom->find('figure', 0);
|
||||
|
||||
// the actual article
|
||||
foreach ($articlePage->find('.post-full-content') as $element) {
|
||||
foreach ($dom->find('.post-full-content') as $element) {
|
||||
$article = $article . $element;
|
||||
}
|
||||
$item['content'] = $article;
|
||||
|
@@ -3,7 +3,7 @@
|
||||
class FreeTelechargerBridge extends BridgeAbstract
|
||||
{
|
||||
const NAME = 'Free-Telecharger';
|
||||
const URI = 'https://www.free-telecharger.live/';
|
||||
const URI = 'https://www.free-telecharger.art/';
|
||||
const DESCRIPTION = 'Suivi de série sur Free-Telecharger';
|
||||
const MAINTAINER = 'sysadminstory';
|
||||
const PARAMETERS = [
|
||||
@@ -12,43 +12,46 @@ class FreeTelechargerBridge extends BridgeAbstract
|
||||
'name' => 'URL de la série',
|
||||
'type' => 'text',
|
||||
'required' => true,
|
||||
'title' => 'URL d\'une série sans le https://www.free-telecharger.live/',
|
||||
'title' => 'URL d\'une série sans le https://www.free-telecharger.art/',
|
||||
'pattern' => 'series.*\.html',
|
||||
'exampleValue' => 'series-vf-hd/145458-the-last-of-us-saison-1-web-dl-720p.html'
|
||||
'exampleValue' => 'series-vf-hd/151432-wolf-saison-1-complete-web-dl-720p.html'
|
||||
],
|
||||
]
|
||||
];
|
||||
const CACHE_TIMEOUT = 3600;
|
||||
private string $showTitle;
|
||||
private string $showTechDetails;
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$html = getSimpleHTMLDOM(self::URI . $this->getInput('url'));
|
||||
$html = getSimpleHTMLDOM(self::URI . $this->getInput('url'));
|
||||
|
||||
// Find all block content of the page
|
||||
$blocks = $html->find('div[class=block1]');
|
||||
// Find all block content of the page
|
||||
$blocks = $html->find('div[class=block1]');
|
||||
|
||||
// Global Infos block
|
||||
$infosBlock = $blocks[0];
|
||||
// Links block
|
||||
$linksBlock = $blocks[2];
|
||||
// Global Infos block
|
||||
$infosBlock = $blocks[0];
|
||||
// Links block
|
||||
$linksBlock = $blocks[2];
|
||||
|
||||
// Extract Global Show infos
|
||||
$this->showTitle = trim($infosBlock->find('div[class=titre1]', 0)->find('font', 0)->plaintext);
|
||||
$this->showTechDetails = trim($infosBlock->find('div[align=center]', 0)->find('b', 0)->plaintext);
|
||||
// Extract Global Show infos
|
||||
$this->showTitle = trim($infosBlock->find('div[class=titre1]', 0)->find('font', 0)->plaintext);
|
||||
$this->showTechDetails = trim($infosBlock->find('div[align=center]', 0)->find('b', 0)->plaintext);
|
||||
|
||||
|
||||
|
||||
// Get Episodes names and links
|
||||
$episodes = $linksBlock->find('div[id=link]', 0)->find('font[color=#ff6600]');
|
||||
$links = $linksBlock->find('div[id=link]', 0)->find('a');
|
||||
// Get Episodes names and links
|
||||
$episodes = $linksBlock->find('div[id=link]', 0)->find('font[color=#e93100]');
|
||||
$links = $linksBlock->find('div[id=link]', 0)->find('a');
|
||||
|
||||
foreach ($episodes as $index => $episode) {
|
||||
$item = []; // Create an empty item
|
||||
$item['title'] = $this->showTitle . ' ' . $this->showTechDetails . ' - ' . ltrim(trim($episode->plaintext), '-');
|
||||
$item['uri'] = $links[$index]->href;
|
||||
$item['content'] = '<a href="' . $item['uri'] . '">' . $item['title'] . '</a>';
|
||||
$item['uid'] = hash('md5', $item['uri']);
|
||||
$item = []; // Create an empty item
|
||||
$item['title'] = $this->showTitle . ' ' . $this->showTechDetails . ' - ' . ltrim(trim($episode->plaintext), '-');
|
||||
$item['uri'] = $links[$index]->href;
|
||||
$item['content'] = '<a href="' . $item['uri'] . '">' . $item['title'] . '</a>';
|
||||
$item['uid'] = hash('md5', $item['uri']);
|
||||
|
||||
$this->items[] = $item; // Add this item to the list
|
||||
$this->items[] = $item; // Add this item to the list
|
||||
}
|
||||
}
|
||||
|
||||
@@ -57,7 +60,7 @@ class FreeTelechargerBridge extends BridgeAbstract
|
||||
switch ($this->queriedContext) {
|
||||
case 'Suivi de publication de série':
|
||||
return $this->showTitle . ' ' . $this->showTechDetails . ' - ' . self::NAME;
|
||||
break;
|
||||
break;
|
||||
default:
|
||||
return self::NAME;
|
||||
}
|
||||
@@ -68,7 +71,7 @@ class FreeTelechargerBridge extends BridgeAbstract
|
||||
switch ($this->queriedContext) {
|
||||
case 'Suivi de publication de série':
|
||||
return self::URI . $this->getInput('url');
|
||||
break;
|
||||
break;
|
||||
default:
|
||||
return self::URI;
|
||||
}
|
||||
@@ -76,14 +79,14 @@ class FreeTelechargerBridge extends BridgeAbstract
|
||||
|
||||
public function detectParameters($url)
|
||||
{
|
||||
// Example: https://www.free-telecharger.live/series-vf-hd/145458-the-last-of-us-saison-1-web-dl-720p.html
|
||||
// Example: https://www.free-telecharger.art/series-vf-hd/151432-wolf-saison-1-complete-web-dl-720p.html
|
||||
|
||||
$params = [];
|
||||
$regex = '/^https:\/\/www.*\.free-telecharger\.live\/(series.*\.html)/';
|
||||
$regex = '/^https:\/\/www.*\.free-telecharger\.art\/(series.*\.html)/';
|
||||
if (preg_match($regex, $url, $matches) > 0) {
|
||||
$params['context'] = 'Suivi de publication de série';
|
||||
$params['url'] = urldecode($matches[1]);
|
||||
return $params;
|
||||
$params['context'] = 'Suivi de publication de série';
|
||||
$params['url'] = urldecode($matches[1]);
|
||||
return $params;
|
||||
}
|
||||
|
||||
return null;
|
||||
|
@@ -85,13 +85,12 @@ class FuturaSciencesBridge extends FeedExpander
|
||||
$this->collectExpandableDatas($url, 10);
|
||||
}
|
||||
|
||||
protected function parseItem($newsItem)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($newsItem);
|
||||
$item['uri'] = str_replace('#xtor%3DRSS-8', '', $item['uri']);
|
||||
$article = getSimpleHTMLDOMCached($item['uri']);
|
||||
$item['content'] = $this->extractArticleContent($article);
|
||||
$author = $this->extractAuthor($article);
|
||||
$dom = getSimpleHTMLDOMCached($item['uri']);
|
||||
$item['content'] = $this->extractArticleContent($dom);
|
||||
$author = $this->extractAuthor($dom);
|
||||
if (!empty($author)) {
|
||||
$item['author'] = $author;
|
||||
}
|
||||
|
@@ -20,13 +20,14 @@ class GatesNotesBridge extends BridgeAbstract
|
||||
$apiUrl = self::URI . $api_endpoint . http_build_query($params);
|
||||
|
||||
$rawContent = getContents($apiUrl);
|
||||
$cleanedContent = str_replace([
|
||||
'<string xmlns="http://schemas.microsoft.com/2003/10/Serialization/">',
|
||||
'</string>',
|
||||
], '', $rawContent);
|
||||
$cleanedContent = trim($rawContent, '"');
|
||||
$cleanedContent = str_replace('\r\n', "\n", $cleanedContent);
|
||||
$cleanedContent = stripslashes($cleanedContent);
|
||||
|
||||
// The content is actually a json between quotes with \r\n inserted
|
||||
$json = Json::decode($cleanedContent, false);
|
||||
if (is_string($json)) {
|
||||
throw new \Exception('wtf? ' . $json);
|
||||
}
|
||||
|
||||
foreach ($json as $article) {
|
||||
$item = [];
|
||||
@@ -98,7 +99,7 @@ class GatesNotesBridge extends BridgeAbstract
|
||||
}
|
||||
$article_body = sanitize($article_body->innertext);
|
||||
|
||||
$content = $top_description . $hero_image . $article_body;
|
||||
$content = $top_description . ($hero_image ?? '') . $article_body;
|
||||
|
||||
return $content;
|
||||
}
|
||||
|
@@ -33,7 +33,15 @@ class GettrBridge extends BridgeAbstract
|
||||
$user,
|
||||
min($this->getInput('limit'), 20)
|
||||
);
|
||||
$data = json_decode(getContents($api), false);
|
||||
try {
|
||||
$json = getContents($api);
|
||||
} catch (HttpException $e) {
|
||||
if ($e->getCode() === 400 && str_contains($e->response->getBody(), 'E_USER_NOTFOUND')) {
|
||||
throw new \Exception('User not found: ' . $user);
|
||||
}
|
||||
throw $e;
|
||||
}
|
||||
$data = json_decode($json, false);
|
||||
|
||||
foreach ($data->result->aux->post as $post) {
|
||||
$this->items[] = [
|
||||
|
@@ -8,10 +8,8 @@ class GizmodoBridge extends FeedExpander
|
||||
const CACHE_TIMEOUT = 1800; // 30min
|
||||
const DESCRIPTION = 'Returns the newest posts from Gizmodo.';
|
||||
|
||||
protected function parseItem($item)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
$html = getSimpleHTMLDOMCached($item['uri']);
|
||||
|
||||
$html = defaultLinkTo($html, $this->getURI());
|
||||
|
@@ -63,9 +63,8 @@ class GolemBridge extends FeedExpander
|
||||
);
|
||||
}
|
||||
|
||||
protected function parseItem($item)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
$item['content'] ??= '';
|
||||
$uri = $item['uri'];
|
||||
|
||||
@@ -83,11 +82,6 @@ class GolemBridge extends FeedExpander
|
||||
// URI without RSS feed reference
|
||||
$item['uri'] = $articlePage->find('head meta[name="twitter:url"]', 0)->content;
|
||||
|
||||
$author = $articlePage->find('article header .authors .authors__name', 0);
|
||||
if ($author) {
|
||||
$item['author'] = $author->plaintext;
|
||||
}
|
||||
|
||||
$categories = $articlePage->find('ul.tags__list li');
|
||||
foreach ($categories as $category) {
|
||||
$trimmedcategories[] = trim(html_entity_decode($category->plaintext));
|
||||
@@ -122,9 +116,6 @@ class GolemBridge extends FeedExpander
|
||||
// reload html, as remove() is buggy
|
||||
$article = str_get_html($article->outertext);
|
||||
|
||||
if ($pageHeader = $article->find('header.paged-cluster-header h1', 0)) {
|
||||
$item .= $pageHeader;
|
||||
}
|
||||
|
||||
$header = $article->find('header', 0);
|
||||
foreach ($header->find('p, figure') as $element) {
|
||||
@@ -138,7 +129,7 @@ class GolemBridge extends FeedExpander
|
||||
$img->src = $img->getAttribute('data-src-full');
|
||||
}
|
||||
|
||||
foreach ($content->find('p, h1, h3, img[src*="."]') as $element) {
|
||||
foreach ($content->find('p, h1, h2, h3, img[src*="."]') as $element) {
|
||||
$item .= $element;
|
||||
}
|
||||
|
||||
|
@@ -21,30 +21,22 @@ class GooglePlayStoreBridge extends BridgeAbstract
|
||||
]
|
||||
]];
|
||||
|
||||
const INFORMATION_MAP = [
|
||||
'Updated' => 'timestamp',
|
||||
'Current Version' => 'title',
|
||||
'Offered By' => 'author'
|
||||
];
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$appuri = static::URI . '/details?id=' . $this->getInput('id');
|
||||
$html = getSimpleHTMLDOM($appuri);
|
||||
$id = $this->getInput('id');
|
||||
$url = 'https://play.google.com/store/apps/details?id=' . $id;
|
||||
$html = getSimpleHTMLDOM($url);
|
||||
|
||||
$updatedAtElement = $html->find('div.TKjAsc div', 2);
|
||||
// Updated onSep 27, 2023
|
||||
$updatedAt = $updatedAtElement->plaintext;
|
||||
$description = $html->find('div.bARER', 0);
|
||||
|
||||
$item = [];
|
||||
$item['uri'] = $appuri;
|
||||
$item['content'] = $html->find('div[itemprop=description]', 1)->innertext;
|
||||
|
||||
// Find other fields from Additional Information section
|
||||
foreach ($html->find('.hAyfc') as $info) {
|
||||
$index = self::INFORMATION_MAP[$info->first_child()->plaintext] ?? null;
|
||||
if (is_null($index)) {
|
||||
continue;
|
||||
}
|
||||
$item[$index] = $info->children(1)->plaintext;
|
||||
}
|
||||
|
||||
$item['uri'] = $url;
|
||||
$item['title'] = $id . ' ' . $updatedAt;
|
||||
$item['content'] = $description->innertext ?? '';
|
||||
$item['uid'] = 'GooglePlayStoreBridge/' . $updatedAt;
|
||||
$this->items[] = $item;
|
||||
}
|
||||
|
||||
|
@@ -2,7 +2,7 @@
|
||||
|
||||
class GoogleScholarBridge extends BridgeAbstract
|
||||
{
|
||||
const NAME = 'Google Scholar v2';
|
||||
const NAME = 'Google Scholar';
|
||||
const URI = 'https://scholar.google.com/';
|
||||
const DESCRIPTION = 'Search for publications or follow authors on Google Scholar.';
|
||||
const MAINTAINER = 'nicholasmccarthy';
|
||||
@@ -193,6 +193,11 @@ class GoogleScholarBridge extends BridgeAbstract
|
||||
$articleUrl = $articleTitleElement->find('a', 0)->href;
|
||||
$articleTitle = $articleTitleElement->plaintext;
|
||||
|
||||
// Break the loop if 'Check for Updates' is found in the article title
|
||||
if (strpos($articleTitle, 'Check for updates') !== false) {
|
||||
break;
|
||||
}
|
||||
|
||||
$articleDateElement = $publication->find('div[class="gs_a"]', 0);
|
||||
$articleDate = $articleDateElement ? $articleDateElement->plaintext : '';
|
||||
|
||||
|
@@ -9,15 +9,13 @@ class HardwareInfoBridge extends FeedExpander
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$this->collectExpandableDatas('https://nl.hardware.info/updates/all.rss', 20);
|
||||
$this->collectExpandableDatas('https://nl.hardware.info/updates/all.rss', 10);
|
||||
}
|
||||
|
||||
protected function parseItem($feedItem)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($feedItem);
|
||||
|
||||
//get full article
|
||||
$articlePage = getSimpleHTMLDOMCached($feedItem->link);
|
||||
$itemUrl = $item['uri'];
|
||||
$articlePage = getSimpleHTMLDOMCached($itemUrl);
|
||||
|
||||
$article = $articlePage->find('div.article__content', 0);
|
||||
|
||||
|
@@ -125,9 +125,8 @@ class HeiseBridge extends FeedExpander
|
||||
);
|
||||
}
|
||||
|
||||
protected function parseItem($feedItem)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($feedItem);
|
||||
$sessioncookie = $this->getInput('sessioncookie');
|
||||
|
||||
// strip rss parameter
|
||||
|
@@ -3273,6 +3273,8 @@ class HotUKDealsBridge extends PepperBridgeAbstract
|
||||
'context-group' => 'Deals per group',
|
||||
'context-talk' => 'Discussion Monitoring',
|
||||
'uri-group' => 'tag/',
|
||||
'uri-deal' => 'deals/',
|
||||
'uri-merchant' => 'search/deals?merchant-id=',
|
||||
'request-error' => 'Could not request HotUKDeals',
|
||||
'thread-error' => 'Unable to determine the thread ID. Check the URL you entered',
|
||||
'no-results' => 'Ooops, looks like we could',
|
||||
@@ -3315,6 +3317,7 @@ class HotUKDealsBridge extends PepperBridgeAbstract
|
||||
'and '
|
||||
],
|
||||
'date-prefixes' => [
|
||||
'Posted ',
|
||||
'Found ',
|
||||
'Refreshed ',
|
||||
'Made hot '
|
||||
|
@@ -10,17 +10,14 @@ class IGNBridge extends FeedExpander
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$this->collectExpandableDatas('http://feeds.ign.com/ign/all', 15);
|
||||
$this->collectExpandableDatas('http://feeds.ign.com/ign/all', 2);
|
||||
}
|
||||
|
||||
// IGNs feed is both hidden and incomplete. This bridge tries to fix this.
|
||||
|
||||
protected function parseItem($newsItem)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($newsItem);
|
||||
|
||||
// $articlePage gets the entire page's contents
|
||||
$articlePage = getSimpleHTMLDOM($newsItem->link);
|
||||
$articlePage = getSimpleHTMLDOM($item['uri']);
|
||||
|
||||
// List of BS elements
|
||||
$uselessElements = [
|
||||
@@ -33,7 +30,7 @@ class IGNBridge extends FeedExpander
|
||||
'.jsx-4213937408',
|
||||
'.commerce-container',
|
||||
'.widget-container',
|
||||
'.newsletter-signup-button'
|
||||
'.newsletter-signup-button',
|
||||
];
|
||||
|
||||
// Remove useless elements
|
||||
|
185
bridges/IdealoBridge.php
Normal file
185
bridges/IdealoBridge.php
Normal file
@@ -0,0 +1,185 @@
|
||||
<?php
|
||||
|
||||
class IdealoBridge extends BridgeAbstract
|
||||
{
|
||||
const NAME = 'Idealo.de Bridge';
|
||||
const URI = 'https://www.idealo.de';
|
||||
const DESCRIPTION = 'Tracks the price for a product on idealo.de. Pricealarm if specific price is set';
|
||||
const MAINTAINER = 'SebLaus';
|
||||
const CACHE_TIMEOUT = 60 * 30; // 30 min
|
||||
const PARAMETERS = [
|
||||
[
|
||||
'Link' => [
|
||||
'name' => 'Idealo.de Link to productpage',
|
||||
'required' => true,
|
||||
'exampleValue' => 'https://www.idealo.de/preisvergleich/OffersOfProduct/202007367_-s7-pro-ultra-roborock.html'
|
||||
],
|
||||
'ExcludeNew' => [
|
||||
'name' => 'Priceupdate: Do not track new items',
|
||||
'type' => 'checkbox',
|
||||
'value' => 'c'
|
||||
],
|
||||
'ExcludeUsed' => [
|
||||
'name' => 'Priceupdate: Do not track used items',
|
||||
'type' => 'checkbox',
|
||||
'value' => 'uc'
|
||||
],
|
||||
'MaxPriceNew' => [
|
||||
'name' => 'Pricealarm: Maximum price for new Product',
|
||||
'type' => 'number'
|
||||
],
|
||||
'MaxPriceUsed' => [
|
||||
'name' => 'Pricealarm: Maximum price for used Product',
|
||||
'type' => 'number'
|
||||
],
|
||||
]
|
||||
];
|
||||
|
||||
public function getIcon()
|
||||
{
|
||||
return 'https://cdn.idealo.com/storage/ids-assets/ico/favicon.ico';
|
||||
}
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
// Needs header with user-agent to function properly.
|
||||
$header = [
|
||||
'user-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2.1 Safari/605.1.15'
|
||||
];
|
||||
|
||||
$link = $this->getInput('Link');
|
||||
$html = getSimpleHTMLDOM($link, $header);
|
||||
|
||||
// Get Productname
|
||||
$titleobj = $html->find('.oopStage-title', 0);
|
||||
$Productname = $titleobj->find('span', 0)->plaintext;
|
||||
|
||||
// Create product specific Cache Keys with the link
|
||||
$KeyNEW = $link;
|
||||
$KeyNEW .= 'NEW';
|
||||
|
||||
$KeyUSED = $link;
|
||||
$KeyUSED .= 'USED';
|
||||
|
||||
// Load previous Price
|
||||
$OldPriceNew = $this->loadCacheValue($KeyNEW);
|
||||
$OldPriceUsed = $this->loadCacheValue($KeyUSED);
|
||||
|
||||
// First button is new. Found at oopStage-conditionButton-wrapper-text class (.)
|
||||
$FirstButton = $html->find('.oopStage-conditionButton-wrapper-text', 0);
|
||||
if ($FirstButton) {
|
||||
$PriceNew = $FirstButton->find('strong', 0)->plaintext;
|
||||
}
|
||||
|
||||
// Second Button is used
|
||||
$SecondButton = $html->find('.oopStage-conditionButton-wrapper-text', 1);
|
||||
if ($SecondButton) {
|
||||
$PriceUsed = $SecondButton->find('strong', 0)->plaintext;
|
||||
}
|
||||
|
||||
// Only continue if a price has changed
|
||||
if ($PriceNew != $OldPriceNew || $PriceUsed != $OldPriceUsed) {
|
||||
// Get Product Image
|
||||
$image = $html->find('.datasheet-cover-image', 0)->src;
|
||||
|
||||
// Generate Content
|
||||
if ($PriceNew > 1) {
|
||||
$content = "<p><b>Price New:</b><br>$PriceNew</p>";
|
||||
$content .= "<p><b>Price New before:</b><br>$OldPriceNew</p>";
|
||||
}
|
||||
|
||||
if ($this->getInput('MaxPriceNew') != '') {
|
||||
$content .= sprintf('<p><b>Max Price Used:</b><br>%s,00 €</p>', $this->getInput('MaxPriceNew'));
|
||||
}
|
||||
|
||||
if ($PriceUsed > 1) {
|
||||
$content .= "<p><b>Price Used:</b><br>$PriceUsed</p>";
|
||||
$content .= "<p><b>Price Used before:</b><br>$OldPriceUsed</p>";
|
||||
}
|
||||
|
||||
if ($this->getInput('MaxPriceUsed') != '') {
|
||||
$content .= sprintf('<p><b>Max Price Used:</b><br>%s,00 €</p>', $this->getInput('MaxPriceUsed'));
|
||||
}
|
||||
|
||||
$content .= "<img src=$image>";
|
||||
|
||||
|
||||
$now = date('d.m.j H:m');
|
||||
|
||||
$Pricealarm = 'Pricealarm %s: %s %s %s';
|
||||
|
||||
// Currently under Max new price
|
||||
if ($this->getInput('MaxPriceNew') != '') {
|
||||
if ($PriceNew < $this->getInput('MaxPriceNew')) {
|
||||
$title = sprintf($Pricealarm, 'Used', $PriceNew, $Productname, $now);
|
||||
$item = [
|
||||
'title' => $title,
|
||||
'uri' => $link,
|
||||
'content' => $content,
|
||||
'uid' => md5($title)
|
||||
];
|
||||
$this->items[] = $item;
|
||||
}
|
||||
}
|
||||
|
||||
// Currently under Max used price
|
||||
if ($this->getInput('MaxPriceUsed') != '') {
|
||||
if ($PriceUsed < $this->getInput('MaxPriceUsed')) {
|
||||
$title = sprintf($Pricealarm, 'Used', $PriceUsed, $Productname, $now);
|
||||
$item = [
|
||||
'title' => $title,
|
||||
'uri' => $link,
|
||||
'content' => $content,
|
||||
'uid' => md5($title)
|
||||
];
|
||||
$this->items[] = $item;
|
||||
}
|
||||
}
|
||||
|
||||
// General Priceupdate
|
||||
if ($this->getInput('MaxPriceUsed') == '' && $this->getInput('MaxPriceNew') == '') {
|
||||
// check if a relevant pricechange happened
|
||||
if (
|
||||
(!$this->getInput('ExcludeNew') && $PriceNew != $OldPriceNew ) ||
|
||||
(!$this->getInput('ExcludeUsed') && $PriceUsed != $OldPriceUsed )
|
||||
) {
|
||||
$title .= 'Priceupdate! ';
|
||||
|
||||
if (!$this->getInput('ExcludeNew')) {
|
||||
if ($PriceNew < $OldPriceNew) {
|
||||
$title .= 'NEW:⬇ '; // Arrow Down Emoji
|
||||
}
|
||||
if ($PriceNew > $OldPriceNew) {
|
||||
$title .= 'NEW:⬆ '; // Arrow Up Emoji
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (!$this->getInput('ExcludeUsed')) {
|
||||
if ($PriceUsed < $OldPriceUsed) {
|
||||
$title .= 'USED:⬇ '; // Arrow Down Emoji
|
||||
}
|
||||
if ($PriceUsed > $OldPriceUsed) {
|
||||
$title .= 'USED:⬆ '; // Arrow Up Emoji
|
||||
}
|
||||
}
|
||||
$title .= $Productname;
|
||||
$title .= ' ';
|
||||
$title .= $now;
|
||||
|
||||
$item = [
|
||||
'title' => $title,
|
||||
'uri' => $link,
|
||||
'content' => $content,
|
||||
'uid' => md5($title)
|
||||
];
|
||||
$this->items[] = $item;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Save current price
|
||||
$this->saveCacheValue($KeyNEW, $PriceNew);
|
||||
$this->saveCacheValue($KeyUSED, $PriceUsed);
|
||||
}
|
||||
}
|
@@ -36,15 +36,12 @@ class ImgsedBridge extends BridgeAbstract
|
||||
],
|
||||
]
|
||||
];
|
||||
|
||||
public function getURI()
|
||||
{
|
||||
if (!is_null($this->getInput('u'))) {
|
||||
return urljoin(self::URI, '/' . $this->getInput('u') . '/');
|
||||
}
|
||||
|
||||
return parent::getURI();
|
||||
}
|
||||
const TEST_DETECT_PARAMETERS = [
|
||||
'https://www.instagram.com/instagram/' => ['context' => 'Username', 'u' => 'instagram', 'post' => 'on', 'story' => 'on', 'tagged' => 'on'],
|
||||
'https://instagram.com/instagram/' => ['context' => 'Username', 'u' => 'instagram', 'post' => 'on', 'story' => 'on', 'tagged' => 'on'],
|
||||
'https://imgsed.com/instagram/' => ['context' => 'Username', 'u' => 'instagram', 'post' => 'on', 'story' => 'on', 'tagged' => 'on'],
|
||||
'https://www.imgsed.com/instagram/' => ['context' => 'Username', 'u' => 'instagram', 'post' => 'on', 'story' => 'on', 'tagged' => 'on'],
|
||||
];
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
@@ -94,9 +91,6 @@ class ImgsedBridge extends BridgeAbstract
|
||||
$isMoreContent = (bool) $post->find('svg', 0);
|
||||
$moreContentNote = $isMoreContent ? '<p><i>(multiple images and/or videos)</i></p>' : '';
|
||||
|
||||
|
||||
|
||||
|
||||
$this->items[] = [
|
||||
'uri' => $url,
|
||||
'author' => $author,
|
||||
@@ -208,20 +202,37 @@ HTML,
|
||||
}
|
||||
}
|
||||
|
||||
// Parse date, and transform the date into a timetamp, even in a case of a relative date
|
||||
private function parseDate($content)
|
||||
{
|
||||
// Parse date, and transform the date into a timetamp, even in a case of a relative date
|
||||
$date = date_create();
|
||||
$dateString = str_replace(' ago', '', $content);
|
||||
|
||||
// Content trimmed to be sure that the "article" is at the beginning of the string and remove "ago" to make it a valid PHP date interval
|
||||
$dateString = trim(str_replace(' ago', '', $content));
|
||||
|
||||
// Replace the article "an" or "a" by the number "1" to be a valid PHP date interval
|
||||
$dateString = preg_replace('/^((an|a) )/m', '1 ', $dateString);
|
||||
|
||||
$relativeDate = date_interval_create_from_date_string($dateString);
|
||||
if ($relativeDate) {
|
||||
date_sub($date, $relativeDate);
|
||||
// As the relative interval has the precision of a day for date older than 24 hours, we can remove the hour of the date, as it is not relevant
|
||||
date_time_set($date, 0, 0, 0, 0);
|
||||
} else {
|
||||
$this->logger->info(sprintf('Unable to parse date string: %s', $dateString));
|
||||
}
|
||||
return date_format($date, 'r');
|
||||
}
|
||||
|
||||
public function getURI()
|
||||
{
|
||||
if (!is_null($this->getInput('u'))) {
|
||||
return urljoin(self::URI, '/' . $this->getInput('u') . '/');
|
||||
}
|
||||
|
||||
return parent::getURI();
|
||||
}
|
||||
|
||||
private function convertURLToInstagram($url)
|
||||
{
|
||||
return str_replace(self::URI, self::INSTAGRAMURI, $url);
|
||||
@@ -244,7 +255,13 @@ HTML,
|
||||
if ($this->getInput('tagged')) {
|
||||
$types[] = 'Tags';
|
||||
}
|
||||
$typesText = $types[0];
|
||||
|
||||
// If no content type is selected, this bridge does nothing, so we return an error
|
||||
if (count($types) == 0) {
|
||||
returnClientError('You must select at least one of the content type : Post, Stories or Tags !');
|
||||
}
|
||||
$typesText = $types[0] ?? '';
|
||||
|
||||
if (count($types) > 1) {
|
||||
for ($i = 1; $i < count($types) - 1; $i++) {
|
||||
$typesText .= ', ' . $types[$i];
|
||||
@@ -262,10 +279,9 @@ HTML,
|
||||
$params = [
|
||||
'post' => 'on',
|
||||
'story' => 'on',
|
||||
'tagged' => 'on'
|
||||
'tagged' => 'on',
|
||||
];
|
||||
$regex = '/^http(s|):\/\/((www\.|)(instagram.com)\/([a-zA-Z0-9_\.]{1,30})\/(reels\/|tagged\/|)
|
||||
|(www\.|)(imgsed.com)\/(stories\/|tagged\/|)([a-zA-Z0-9_\.]{1,30})\/)/';
|
||||
$regex = '/^http(s|):\/\/((www\.|)(instagram.com)\/([a-zA-Z0-9_\.]{1,30})(\/reels\/|\/tagged\/|\/|)|(www\.|)(imgsed.com)\/(stories\/|tagged\/|)([a-zA-Z0-9_\.]{1,30})\/)/';
|
||||
if (preg_match($regex, $url, $matches) > 0) {
|
||||
$params['context'] = 'Username';
|
||||
// Extract detected domain using the regex
|
||||
@@ -273,7 +289,7 @@ HTML,
|
||||
if ($domain == 'imgsed.com') {
|
||||
$params['u'] = $matches[10];
|
||||
return $params;
|
||||
} else if ($domain == 'instagram.com') {
|
||||
} elseif ($domain == 'instagram.com') {
|
||||
$params['u'] = $matches[5];
|
||||
return $params;
|
||||
} else {
|
||||
|
@@ -121,6 +121,9 @@ class InstagramBridge extends BridgeAbstract
|
||||
$directLink = !is_null($this->getInput('direct_links')) && $this->getInput('direct_links');
|
||||
|
||||
$data = $this->getInstagramJSON($this->getURI());
|
||||
if (!$data) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (!is_null($this->getInput('u'))) {
|
||||
$userMedia = $data->data->user->edge_owner_to_timeline_media->edges;
|
||||
@@ -286,9 +289,11 @@ class InstagramBridge extends BridgeAbstract
|
||||
$html = getContents($uri);
|
||||
$scriptRegex = '/window\._sharedData = (.*);<\/script>/';
|
||||
|
||||
preg_match($scriptRegex, $html, $matches, PREG_OFFSET_CAPTURE, 0);
|
||||
|
||||
return json_decode($matches[1][0]);
|
||||
$ret = preg_match($scriptRegex, $html, $matches, PREG_OFFSET_CAPTURE);
|
||||
if ($ret) {
|
||||
return json_decode($matches[1][0]);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -201,7 +201,7 @@ class ItakuBridge extends BridgeAbstract
|
||||
'rating_e' => $this->getInput('rating_e')
|
||||
];
|
||||
|
||||
$tag_arr = explode(' ', $this->getInput('tags'));
|
||||
$tag_arr = explode(' ', $this->getInput('tags') ?? '');
|
||||
foreach ($tag_arr as $str) {
|
||||
switch ($str[0]) {
|
||||
case '-':
|
||||
@@ -280,7 +280,7 @@ class ItakuBridge extends BridgeAbstract
|
||||
$opt['range'] = '';
|
||||
$user_id = $this->getInput('user_id') ?? $this->getOwnerID($this->getInput('user'));
|
||||
|
||||
$data = $this->getFeed(
|
||||
$data = $this->getFeedData(
|
||||
$opt,
|
||||
$user_id
|
||||
);
|
||||
@@ -289,7 +289,7 @@ class ItakuBridge extends BridgeAbstract
|
||||
if ($this->queriedContext === 'Home feed') {
|
||||
$opt['order'] = $this->getInput('order');
|
||||
$opt['range'] = $this->getInput('range');
|
||||
$data = $this->getFeed($opt);
|
||||
$data = $this->getFeedData($opt);
|
||||
}
|
||||
|
||||
foreach ($data['results'] as $record) {
|
||||
@@ -409,7 +409,7 @@ class ItakuBridge extends BridgeAbstract
|
||||
return $this->getData($url, false, true);
|
||||
}
|
||||
|
||||
private function getFeed(array $opt, $ownerID = null)
|
||||
private function getFeedData(array $opt, $ownerID = null)
|
||||
{
|
||||
$url = self::URI . "/api/feed/?date_range={$opt['range']}&ordering={$opt['order']}&page=1&page_size=30&format=json";
|
||||
|
||||
@@ -446,6 +446,9 @@ class ItakuBridge extends BridgeAbstract
|
||||
|
||||
private function getPost($id, array $metadata = null)
|
||||
{
|
||||
if (isset($metadata) && sizeof($metadata['gallery_images']) < $metadata['num_images']) {
|
||||
$metadata = null; //force re-fetch of metadata
|
||||
}
|
||||
$uri = self::URI . '/posts/' . $id;
|
||||
$url = self::URI . '/api/posts/' . $id . '/?format=json';
|
||||
$data = $metadata ?? $this->getData($url, true, true)
|
||||
|
175
bridges/KleinanzeigenBridge.php
Normal file
175
bridges/KleinanzeigenBridge.php
Normal file
@@ -0,0 +1,175 @@
|
||||
<?php
|
||||
|
||||
class KleinanzeigenBridge extends BridgeAbstract
|
||||
{
|
||||
const MAINTAINER = 'knrdl';
|
||||
const NAME = 'Kleinanzeigen Bridge';
|
||||
const URI = 'https://www.kleinanzeigen.de';
|
||||
const CACHE_TIMEOUT = 3600; // 1h
|
||||
const DESCRIPTION = '(ebay) Kleinanzeigen';
|
||||
|
||||
const PARAMETERS = [
|
||||
'By search' => [
|
||||
'query' => [
|
||||
'name' => 'query',
|
||||
'required' => false,
|
||||
'title' => 'query term',
|
||||
],
|
||||
'category' => [
|
||||
'name' => 'category',
|
||||
'required' => false,
|
||||
'title' => 'search category, e.g. "Damenschuhe" or "Notebooks"'
|
||||
],
|
||||
'location' => [
|
||||
'name' => 'location',
|
||||
'required' => false,
|
||||
'title' => 'e.g. Berlin',
|
||||
],
|
||||
'radius' => [
|
||||
'name' => 'radius',
|
||||
'required' => false,
|
||||
'type' => 'number',
|
||||
'title' => 'location radius in kilometers',
|
||||
'defaultValue' => 10,
|
||||
],
|
||||
'minprice' => [
|
||||
'name' => 'minimum price',
|
||||
'required' => false,
|
||||
'type' => 'number',
|
||||
'title' => 'in euros'
|
||||
],
|
||||
'maxprice' => [
|
||||
'name' => 'maximum price',
|
||||
'required' => false,
|
||||
'type' => 'number',
|
||||
'title' => 'in euros'
|
||||
],
|
||||
'pages' => [
|
||||
'name' => 'pages',
|
||||
'required' => true,
|
||||
'type' => 'number',
|
||||
'title' => 'how many pages to fetch',
|
||||
'defaultValue' => 2,
|
||||
]
|
||||
],
|
||||
'By profile' => [
|
||||
'userid' => [
|
||||
'name' => 'user id',
|
||||
'required' => true,
|
||||
'type' => 'number',
|
||||
'exampleValue' => 12345678
|
||||
],
|
||||
'pages' => [
|
||||
'name' => 'pages',
|
||||
'required' => true,
|
||||
'type' => 'number',
|
||||
'title' => 'how many pages to fetch',
|
||||
'defaultValue' => 2,
|
||||
]
|
||||
],
|
||||
];
|
||||
|
||||
public function getIcon()
|
||||
{
|
||||
return 'https://www.kleinanzeigen.de/favicon.ico';
|
||||
}
|
||||
|
||||
public function getName()
|
||||
{
|
||||
switch ($this->queriedContext) {
|
||||
case 'By profile':
|
||||
return 'Kleinanzeigen Profil';
|
||||
case 'By search':
|
||||
return 'Kleinanzeigen ' . $this->getInput('query') . ' ' . $this->getInput('category') . ' ' . $this->getInput('location');
|
||||
default:
|
||||
return parent::getName();
|
||||
}
|
||||
}
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
if ($this->queriedContext === 'By profile') {
|
||||
for ($i = 1; $i <= $this->getInput('pages'); $i++) {
|
||||
$html = getSimpleHTMLDOM(self::URI . '/s-bestandsliste.html?userId=' . $this->getInput('userid') . '&pageNum=' . $i . '&sortingField=SORTING_DATE');
|
||||
|
||||
$foundItem = false;
|
||||
foreach ($html->find('article.aditem') as $element) {
|
||||
$this->addItem($element);
|
||||
$foundItem = true;
|
||||
}
|
||||
if (!$foundItem) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ($this->queriedContext === 'By search') {
|
||||
$categoryId = $this->findCategoryId();
|
||||
for ($page = 1; $page <= $this->getInput('pages'); $page++) {
|
||||
$searchUrl = self::URI . '/s-suchanfrage.html?' . http_build_query([
|
||||
'keywords' => $this->getInput('query'),
|
||||
'locationStr' => $this->getInput('location'),
|
||||
'locationId' => '',
|
||||
'radius' => $this->getInput('radius') || '0',
|
||||
'sortingField' => 'SORTING_DATE',
|
||||
'categoryId' => $categoryId,
|
||||
'pageNum' => $page,
|
||||
'maxPrice' => $this->getInput('maxprice'),
|
||||
'minPrice' => $this->getInput('minprice')
|
||||
]);
|
||||
|
||||
$html = getSimpleHTMLDOM($searchUrl);
|
||||
|
||||
// end of list if returned page is not the expected one
|
||||
if ($html->find('.pagination-current', 0)->plaintext != $page) {
|
||||
break;
|
||||
}
|
||||
|
||||
foreach ($html->find('ul#srchrslt-adtable article.aditem') as $element) {
|
||||
$this->addItem($element);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private function addItem($element)
|
||||
{
|
||||
$item = [];
|
||||
|
||||
$item['uid'] = $element->getAttribute('data-adid');
|
||||
$item['uri'] = self::URI . $element->getAttribute('data-href');
|
||||
|
||||
$item['title'] = $element->find('h2', 0)->plaintext;
|
||||
$item['timestamp'] = $element->find('div.aditem-main--top--right', 0)->plaintext;
|
||||
$imgUrl = str_replace(
|
||||
'rule=$_2.JPG',
|
||||
'rule=$_57.JPG',
|
||||
str_replace(
|
||||
'rule=$_35.JPG',
|
||||
'rule=$_57.JPG',
|
||||
$element->find('img', 0) ? $element->find('img', 0)->getAttribute('src') : ''
|
||||
)
|
||||
); //enhance img quality
|
||||
$textContainer = $element->find('div.aditem-main', 0);
|
||||
$textContainer->find('a', 0)->href = self::URI . $textContainer->find('a', 0)->href; // add domain to url
|
||||
$item['content'] = '<img src="' . $imgUrl . '"/>' .
|
||||
$textContainer->outertext;
|
||||
|
||||
$this->items[] = $item;
|
||||
}
|
||||
|
||||
private function findCategoryId()
|
||||
{
|
||||
if ($this->getInput('category')) {
|
||||
$html = getSimpleHTMLDOM(self::URI . '/s-kategorie-baum.html');
|
||||
foreach ($html->find('a[data-val]') as $element) {
|
||||
$catId = (int)$element->getAttribute('data-val');
|
||||
$catName = $element->plaintext;
|
||||
if (str_contains(strtolower($catName), strtolower($this->getInput('category')))) {
|
||||
return $catId;
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
@@ -27,12 +27,15 @@ class KoFiBridge extends BridgeAbstract
|
||||
if (isset($titleWrapper[0])) {
|
||||
$item = [];
|
||||
$item['title'] = $element->find('div.content-link-text div')[0]->plaintext;
|
||||
// $item['timestamp'] = strtotime($element->find('div.feeditem-time', 0)->plaintext);
|
||||
$item['uri'] = self::URI . $element->find('div.fi-post-item-large a')[0]->href;
|
||||
$uri = $element->find('div.content-link-text div')[2]->find('a')[0]->onclick;
|
||||
$uri = trim(str_replace('window.location =', '', $uri));
|
||||
$uri = trim(str_replace(''', '', $uri));
|
||||
$uri = trim(str_replace(';', '', $uri));
|
||||
$item['uri'] = self::URI . $uri;
|
||||
|
||||
if (isset($element->find('div.fi-post-item-large div.content-link-post img')[0])) {
|
||||
$item['enclosures'][] = $element->find('div.fi-post-item-large div.content-link-post img')[0]->src;
|
||||
}
|
||||
// $item['content'] = $element->find('div.content-link-text div#content-link', 0)->plaintext;
|
||||
|
||||
$html = getSimpleHTMLDOM($item['uri']);
|
||||
$feedItemTime = $html->find('div.feeditem-time', 0);
|
||||
|
@@ -7,10 +7,8 @@ class KoreusBridge extends FeedExpander
|
||||
const URI = 'https://www.koreus.com/';
|
||||
const DESCRIPTION = 'Returns the newest posts from Koreus (full text)';
|
||||
|
||||
protected function parseItem($item)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($item);
|
||||
|
||||
$html = getSimpleHTMLDOMCached($item['uri']);
|
||||
$text = $html->find('p.itemText', 0)->innertext;
|
||||
$item['content'] = utf8_encode($text);
|
||||
|
@@ -12,9 +12,8 @@ class LeMondeInformatiqueBridge extends FeedExpander
|
||||
$this->collectExpandableDatas(self::URI . 'rss/rss.xml', 10);
|
||||
}
|
||||
|
||||
protected function parseItem($newsItem)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($newsItem);
|
||||
$article_html = getSimpleHTMLDOMCached($item['uri']);
|
||||
|
||||
//Deduce thumbnail URL from article image URL
|
||||
|
@@ -13,12 +13,10 @@ class ListverseBridge extends FeedExpander
|
||||
$this->collectExpandableDatas('https://listverse.com/feed/', 15);
|
||||
}
|
||||
|
||||
protected function parseItem($newsItem)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($newsItem);
|
||||
// $articlePage gets the entire page's contents
|
||||
$articlePage = getSimpleHTMLDOM($newsItem->link);
|
||||
$article = $articlePage->find('#articlecontentonly', 0);
|
||||
$dom = getSimpleHTMLDOM($item['uri']);
|
||||
$article = $dom->find('#articlecontentonly', 0);
|
||||
$item['content'] = $article;
|
||||
return $item;
|
||||
}
|
||||
|
26
bridges/LogicMastersBridge.php
Normal file
26
bridges/LogicMastersBridge.php
Normal file
@@ -0,0 +1,26 @@
|
||||
<?php
|
||||
|
||||
class LogicMastersBridge extends XPathAbstract
|
||||
{
|
||||
const NAME = 'Logic Masters Deutschland e.V.';
|
||||
const URI = 'https://logic-masters.de/';
|
||||
const DESCRIPTION = 'Aktuelles';
|
||||
const MAINTAINER = 'hleskien';
|
||||
|
||||
const FEED_SOURCE_URL = 'https://logic-masters.de/';
|
||||
//const XPATH_EXPRESSION_FEED_ICON = './/link[@rel="SHORTCUT ICON"]/@href';
|
||||
const XPATH_EXPRESSION_ITEM = '//div[@class="aktuelles_eintrag"]';
|
||||
const XPATH_EXPRESSION_ITEM_TITLE = './div[@class="aktuelles_titel"]';
|
||||
const XPATH_EXPRESSION_ITEM_CONTENT = './p';
|
||||
//const XPATH_EXPRESSION_ITEM_URI = './a/@href';
|
||||
//const XPATH_EXPRESSION_ITEM_AUTHOR = './/';
|
||||
const XPATH_EXPRESSION_ITEM_TIMESTAMP = './div[@class="aktuelles_datum"]';
|
||||
//const XPATH_EXPRESSION_ITEM_ENCLOSURES = './';
|
||||
//const XPATH_EXPRESSION_ITEM_CATEGORIES = './/';
|
||||
|
||||
protected function formatItemTimestamp($value)
|
||||
{
|
||||
$formatter = new IntlDateFormatter('de', IntlDateFormatter::LONG, IntlDateFormatter::NONE);
|
||||
return $formatter->parse($value);
|
||||
}
|
||||
}
|
41
bridges/LuftfahrtBundesAmtBridge.php
Normal file
41
bridges/LuftfahrtBundesAmtBridge.php
Normal file
@@ -0,0 +1,41 @@
|
||||
<?php
|
||||
|
||||
class LuftfahrtBundesAmtBridge extends XPathAbstract
|
||||
{
|
||||
const NAME = 'Luftfahrt-Bundesamt';
|
||||
const URI = 'https://www.lba.de/DE/Home/Nachrichten/nachrichten_node.html';
|
||||
const DESCRIPTION = 'alle Nachrichten: Liste aller Meldungen';
|
||||
const MAINTAINER = 'hleskien';
|
||||
|
||||
const FEED_SOURCE_URL = 'https://www.lba.de/DE/Home/Nachrichten/nachrichten_node.html';
|
||||
const XPATH_EXPRESSION_FEED_ICON = './/link[@rel="shortcut icon"]/@href';
|
||||
const XPATH_EXPRESSION_ITEM = '//table/tbody/tr';
|
||||
const XPATH_EXPRESSION_ITEM_TITLE = './td[2]/a/text()';
|
||||
const XPATH_EXPRESSION_ITEM_CONTENT = './td[2]/a/text()';
|
||||
const XPATH_EXPRESSION_ITEM_URI = './td[2]/a/@href';
|
||||
//const XPATH_EXPRESSION_ITEM_AUTHOR = './/';
|
||||
const XPATH_EXPRESSION_ITEM_TIMESTAMP = './td[1]';
|
||||
//const XPATH_EXPRESSION_ITEM_ENCLOSURES = './';
|
||||
//const XPATH_EXPRESSION_ITEM_CATEGORIES = './/';
|
||||
|
||||
protected function provideFeedIcon(\DOMXPath $xpath)
|
||||
{
|
||||
return parent::provideFeedIcon($xpath) . '?__blob=normal&v=3';
|
||||
}
|
||||
|
||||
protected function formatItemTimestamp($value)
|
||||
{
|
||||
$value = trim($value);
|
||||
$dti = DateTimeImmutable::createFromFormat('d.m.Y', $value);
|
||||
$dti = $dti->setTime(0, 0, 0);
|
||||
return $dti->getTimestamp();
|
||||
}
|
||||
|
||||
// remove jsession part
|
||||
protected function formatItemUri($value)
|
||||
{
|
||||
$parts = explode(';', $value);
|
||||
return $parts[0];
|
||||
}
|
||||
}
|
||||
|
26
bridges/LuftsportSHBridge.php
Normal file
26
bridges/LuftsportSHBridge.php
Normal file
@@ -0,0 +1,26 @@
|
||||
<?php
|
||||
|
||||
class LuftsportSHBridge extends XPathAbstract
|
||||
{
|
||||
const NAME = 'Luftsportverband Schleswig-Holstein';
|
||||
const URI = 'https://www.luftsport-sh.de/start.html';
|
||||
const DESCRIPTION = 'Aktuelles vom Luftsportverband Schleswig-Holstein e.V.';
|
||||
const MAINTAINER = 'hleskien';
|
||||
|
||||
const FEED_SOURCE_URL = 'https://www.luftsport-sh.de/start.html';
|
||||
const XPATH_EXPRESSION_FEED_ICON = './/link[@rel="icon" and @sizes="16x16"]/@href';
|
||||
const XPATH_EXPRESSION_ITEM = '//div[contains(@class, "mod_newslist")]/div';
|
||||
const XPATH_EXPRESSION_ITEM_TITLE = './/*[@itemprop="name"]/a/text()';
|
||||
const XPATH_EXPRESSION_ITEM_CONTENT = './/div[@itemprop="description"]/p/text()';
|
||||
const XPATH_EXPRESSION_ITEM_URI = './h3/a/@href';
|
||||
//const XPATH_EXPRESSION_ITEM_AUTHOR = './/';
|
||||
const XPATH_EXPRESSION_ITEM_TIMESTAMP = './/time/@datetime';
|
||||
const XPATH_EXPRESSION_ITEM_ENCLOSURES = './/img/@src';
|
||||
//const XPATH_EXPRESSION_ITEM_CATEGORIES = './/';
|
||||
|
||||
protected function formatItemTimestamp($value)
|
||||
{
|
||||
$dti = DateTimeImmutable::createFromFormat(DateTimeInterface::ATOM, $value);
|
||||
return $dti->getTimestamp();
|
||||
}
|
||||
}
|
44
bridges/MangaReaderBridge.php
Normal file
44
bridges/MangaReaderBridge.php
Normal file
@@ -0,0 +1,44 @@
|
||||
<?php
|
||||
|
||||
class MangaReaderBridge extends BridgeAbstract
|
||||
{
|
||||
const NAME = 'MangaReader Bridge';
|
||||
const URI = 'https://mangareader.to';
|
||||
const DESCRIPTION = 'Fetches the latest chapters from MangaReader.to.';
|
||||
const MAINTAINER = 'cubethethird';
|
||||
const PARAMETERS = [
|
||||
[
|
||||
'url' => [
|
||||
'name' => 'Manga URL',
|
||||
'type' => 'text',
|
||||
'required' => true,
|
||||
'title' => 'The URL of the manga on MangaReader',
|
||||
'pattern' => '^https:\/\/mangareader\.to\/[^\/]+$',
|
||||
'exampleValue' => 'https://mangareader.to/bleach-1623',
|
||||
],
|
||||
'lang' => [
|
||||
'name' => 'Chapter Language',
|
||||
'title' => 'two-letter language code (example "en", "jp", "fr")',
|
||||
'exampleValue' => 'en',
|
||||
'required' => true,
|
||||
'pattern' => '^[a-z][a-z]$',
|
||||
]
|
||||
]
|
||||
];
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$url = $this->getInput('url');
|
||||
$lang = $this->getInput('lang');
|
||||
$dom = getSimpleHTMLDOM($url);
|
||||
$chapters = $dom->getElementById($lang . '-chapters');
|
||||
|
||||
foreach ($chapters->getElementsByTagName('li') as $chapter) {
|
||||
$a = $chapter->getElementsByTagName('a')[0];
|
||||
$item = [];
|
||||
$item['title'] = $a->getAttribute('title');
|
||||
$item['uri'] = self::URI . $a->getAttribute('href');
|
||||
$this->items[] = $item;
|
||||
}
|
||||
}
|
||||
}
|
48
bridges/ManyVidsBridge.php
Normal file
48
bridges/ManyVidsBridge.php
Normal file
@@ -0,0 +1,48 @@
|
||||
<?php
|
||||
|
||||
class ManyVidsBridge extends BridgeAbstract
|
||||
{
|
||||
const NAME = 'MANYVIDS';
|
||||
const URI = 'https://www.manyvids.com';
|
||||
const DESCRIPTION = 'Fetches the latest posts from a profile';
|
||||
const MAINTAINER = 'dvikan';
|
||||
const CACHE_TIMEOUT = 60 * 60;
|
||||
const PARAMETERS = [
|
||||
[
|
||||
'profile' => [
|
||||
'name' => 'Profile',
|
||||
'type' => 'text',
|
||||
'required' => true,
|
||||
'exampleValue' => '678459/Aziani-Studios',
|
||||
'title' => 'id/profile or url',
|
||||
],
|
||||
]
|
||||
];
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$profile = $this->getInput('profile');
|
||||
if (preg_match('#^(\d+/.*)$#', $profile, $m)) {
|
||||
$profile = $m[1];
|
||||
} elseif (preg_match('#https://www.manyvids.com/Profile/(\d+/\w+)#', $profile, $m)) {
|
||||
$profile = $m[1];
|
||||
} else {
|
||||
throw new \Exception('nope');
|
||||
}
|
||||
|
||||
$url = sprintf('https://www.manyvids.com/Profile/%s/Store/Videos/', $profile);
|
||||
$dom = getSimpleHTMLDOM($url);
|
||||
$el = $dom->find('section[id="app-store-videos"]', 0);
|
||||
$json = $el->getAttribute('data-store-videos');
|
||||
$json = html_entity_decode($json);
|
||||
$data = Json::decode($json, false);
|
||||
foreach ($data->content->items as $item) {
|
||||
$this->items[] = [
|
||||
'title' => $item->title,
|
||||
'uri' => 'https://www.manyvids.com' . $item->preview->path,
|
||||
'uid' => 'manyvids/' . $item->id,
|
||||
'content' => sprintf('<img src="%s">', $item->videoThumb),
|
||||
];
|
||||
}
|
||||
}
|
||||
}
|
@@ -14,6 +14,51 @@ class MarktplaatsBridge extends BridgeAbstract
|
||||
'required' => true,
|
||||
'title' => 'The search string for marktplaats',
|
||||
],
|
||||
'c' => [
|
||||
'name' => 'Category',
|
||||
'type' => 'list',
|
||||
'values' => [
|
||||
'Select a category' => '',
|
||||
'Antiek en Kunst' => '1',
|
||||
'Audio, Tv en Foto' => '31',
|
||||
'Auto's' => '91',
|
||||
'Auto-onderdelen' => '2600',
|
||||
'Auto diversen' => '48',
|
||||
'Boeken' => '201',
|
||||
'Caravans en Kamperen' => '289',
|
||||
'Cd's en Dvd's' => '1744',
|
||||
'Computers en Software' => '322',
|
||||
'Contacten en Berichten' => '378',
|
||||
'Diensten en Vakmensen' => '1098',
|
||||
'Dieren en Toebehoren' => '395',
|
||||
'Doe-het-zelf en Verbouw' => '239',
|
||||
'Fietsen en Brommers' => '445',
|
||||
'Hobby en Vrije tijd' => '1099',
|
||||
'Huis en Inrichting' => '504',
|
||||
'Huizen en Kamers' => '1032',
|
||||
'Kinderen en Baby's' => '565',
|
||||
'Kleding | Dames' => '621',
|
||||
'Kleding | Heren' => '1776',
|
||||
'Motoren' => '678',
|
||||
'Muziek en Instrumenten' => '728',
|
||||
'Postzegels en Munten' => '1784',
|
||||
'Sieraden, Tassen en Uiterlijk' => '1826',
|
||||
'Spelcomputers en Games' => '356',
|
||||
'Sport en Fitness' => '784',
|
||||
'Telecommunicatie' => '820',
|
||||
'Tickets en Kaartjes' => '1984',
|
||||
'Tuin en Terras' => '1847',
|
||||
'Vacatures' => '167',
|
||||
'Vakantie' => '856',
|
||||
'Verzamelen' => '895',
|
||||
'Watersport en Boten' => '976',
|
||||
'Witgoed en Apparatuur' => '537',
|
||||
'Zakelijke goederen' => '1085',
|
||||
'Diversen' => '428',
|
||||
],
|
||||
'required' => false,
|
||||
'title' => 'The category to search in',
|
||||
],
|
||||
'z' => [
|
||||
'name' => 'zipcode',
|
||||
'type' => 'text',
|
||||
@@ -57,7 +102,15 @@ class MarktplaatsBridge extends BridgeAbstract
|
||||
'type' => 'checkbox',
|
||||
'required' => false,
|
||||
'title' => 'Include the raw data behind the content',
|
||||
]
|
||||
],
|
||||
'sc' => [
|
||||
'name' => 'Sub category',
|
||||
'type' => 'number',
|
||||
'required' => false,
|
||||
'exampleValue' => '12345',
|
||||
'title' => 'Sub category has to be given by id as the list is too big to show here.
|
||||
Only use subcategories that belong to the main category. Both have to be correct',
|
||||
],
|
||||
]
|
||||
];
|
||||
const CACHE_TIMEOUT = 900;
|
||||
@@ -80,6 +133,12 @@ class MarktplaatsBridge extends BridgeAbstract
|
||||
$excludeGlobal = true;
|
||||
}
|
||||
}
|
||||
if (!empty($this->getInput('c'))) {
|
||||
$query .= '&l1CategoryId=' . $this->getInput('c');
|
||||
}
|
||||
if (!is_null($this->getInput('sc'))) {
|
||||
$query .= '&l2CategoryId=' . $this->getInput('sc');
|
||||
}
|
||||
$url = 'https://www.marktplaats.nl/lrp/api/search?query=' . urlencode($this->getInput('q')) . $query;
|
||||
$jsonString = getSimpleHTMLDOM($url);
|
||||
$jsonObj = json_decode($jsonString);
|
||||
@@ -97,15 +156,15 @@ class MarktplaatsBridge extends BridgeAbstract
|
||||
$item['enclosures'] = $listing->imageUrls;
|
||||
if (is_array($listing->imageUrls)) {
|
||||
foreach ($listing->imageUrls as $imgurl) {
|
||||
$item['content'] .= "<br />\n<img src='https:" . $imgurl . "' />";
|
||||
$item['content'] .= "<br />\n<img alt='' src='https:" . $imgurl . "' />";
|
||||
}
|
||||
} else {
|
||||
$item['content'] .= "<br>\n<img src='https:" . $listing->imageUrls . "' />";
|
||||
$item['content'] .= "<br>\n<img alt='' src='https:" . $listing->imageUrls . "' />";
|
||||
}
|
||||
}
|
||||
if (!is_null($this->getInput('r'))) {
|
||||
if ($this->getInput('r')) {
|
||||
$item['content'] .= "<br />\n<br />\n<br />\n" . json_encode($listing);
|
||||
$item['content'] .= "<br />\n<br />\n<br />\n" . json_encode($listing) . "<br />$url";
|
||||
}
|
||||
}
|
||||
$item['content'] .= "<br>\n<br>\nPrice: " . $listing->priceInfo->priceCents / 100;
|
||||
@@ -130,4 +189,80 @@ class MarktplaatsBridge extends BridgeAbstract
|
||||
}
|
||||
return parent::getName();
|
||||
}
|
||||
|
||||
/**
|
||||
* Method can be used to scrape the subcategories from marktplaats
|
||||
*/
|
||||
private static function scrapeSubCategories()
|
||||
{
|
||||
$main = [];
|
||||
$main['Select a category'] = '';
|
||||
$marktplaatsHTML = file_get_html('https://www.marktplaats.nl');
|
||||
foreach ($marktplaatsHTML->find('select[id=categoryId] option') as $opt) {
|
||||
if (!str_contains($opt->innertext, 'categorie')) {
|
||||
$main[$opt->innertext] = $opt->value;
|
||||
$ids[] = $opt->value;
|
||||
}
|
||||
}
|
||||
|
||||
$result = [];
|
||||
foreach ($ids as $id) {
|
||||
$url = 'https://www.marktplaats.nl/lrp/api/search?l1CategoryId=' . $id;
|
||||
$jsonstring = getContents($url);
|
||||
$jsondata = json_decode((string)$jsonstring);
|
||||
if (isset($jsondata->searchCategoryOptions)) {
|
||||
$categories = $jsondata->searchCategoryOptions;
|
||||
if (isset($jsondata->categoriesById->$id)) {
|
||||
$maincategory = $jsondata->categoriesById->$id;
|
||||
$array = [];
|
||||
foreach ($categories as $categorie) {
|
||||
$array[$categorie->fullName] = $categorie->id;
|
||||
}
|
||||
$result[$maincategory->fullName] = $array;
|
||||
}
|
||||
} else {
|
||||
print($jsonstring);
|
||||
}
|
||||
}
|
||||
$combinedResult = [
|
||||
'main' => $main,
|
||||
'sub' => $result
|
||||
];
|
||||
return $combinedResult;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper method to construct the array that could be used for categories
|
||||
*
|
||||
* @param $array
|
||||
* @param $indent
|
||||
* @return void
|
||||
*/
|
||||
private static function printArrayAsCode($array, $indent = 0)
|
||||
{
|
||||
foreach ($array as $key => $value) {
|
||||
if (is_array($value)) {
|
||||
echo str_repeat(' ', $indent) . "'$key' => [" . PHP_EOL;
|
||||
self::printArrayAsCode($value, $indent + 1);
|
||||
echo str_repeat(' ', $indent) . '],' . PHP_EOL;
|
||||
} else {
|
||||
$value = str_replace('\'', '\\\'', $value);
|
||||
$key = str_replace('\'', '\\\'', $key);
|
||||
echo str_repeat(' ', $indent) . "'$key' => '$value'," . PHP_EOL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static function printScrapeArray()
|
||||
{
|
||||
$array = (MarktplaatsBridge::scrapeSubCategories());
|
||||
|
||||
echo '$myArray = [' . PHP_EOL;
|
||||
self::printArrayAsCode($array['main'], 1);
|
||||
echo '];' . PHP_EOL;
|
||||
|
||||
echo '$myArray = [' . PHP_EOL;
|
||||
self::printArrayAsCode($array['sub'], 1);
|
||||
echo '];' . PHP_EOL;
|
||||
}
|
||||
}
|
||||
|
@@ -82,14 +82,14 @@ class MastodonBridge extends BridgeAbstract
|
||||
}
|
||||
$items = $content['orderedItems'] ?? $content['items'];
|
||||
foreach ($items as $status) {
|
||||
$item = $this->parseItem($status);
|
||||
$item = $this->parseStatus($status);
|
||||
if ($item) {
|
||||
$this->items[] = $item;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected function parseItem($content)
|
||||
protected function parseStatus($content)
|
||||
{
|
||||
$item = [];
|
||||
switch ($content['type']) {
|
||||
@@ -161,8 +161,8 @@ class MastodonBridge extends BridgeAbstract
|
||||
$object = $this->fetchAP($object);
|
||||
}
|
||||
|
||||
$item['content'] = $object['content'];
|
||||
$strippedContent = strip_tags(str_replace('<br>', ' ', $object['content']));
|
||||
$item['content'] = $object['content'] ?? '';
|
||||
$strippedContent = strip_tags(str_replace('<br>', ' ', $item['content']));
|
||||
|
||||
if (isset($object['name'])) {
|
||||
$item['title'] = $object['name'];
|
||||
@@ -186,13 +186,14 @@ class MastodonBridge extends BridgeAbstract
|
||||
|
||||
foreach ($object['attachment'] as $attachment) {
|
||||
// Only process REMOTE pictures (prevent xss)
|
||||
$mediaType = $attachment['mediaType'] ?? null;
|
||||
if (
|
||||
$attachment['mediaType']
|
||||
&& preg_match('/^image\//', $attachment['mediaType'], $match)
|
||||
$mediaType
|
||||
&& preg_match('/^image\//', $mediaType, $match)
|
||||
&& preg_match('/^http(s|):\/\//', $attachment['url'], $match)
|
||||
) {
|
||||
$item['content'] = $item['content'] . '<br /><img ';
|
||||
if ($attachment['name']) {
|
||||
if (isset($attachment['name'])) {
|
||||
$item['content'] .= sprintf('alt="%s" ', $attachment['name']);
|
||||
}
|
||||
$item['content'] .= sprintf('src="%s" />', $attachment['url']);
|
||||
|
@@ -29,9 +29,9 @@ class MediapartBridge extends FeedExpander
|
||||
$this->collectExpandableDatas($url);
|
||||
}
|
||||
|
||||
protected function parseItem($newsItem)
|
||||
protected function parseItem(array $item)
|
||||
{
|
||||
$item = parent::parseItem($newsItem);
|
||||
$itemUrl = $item['uri'];
|
||||
|
||||
// Mediapart provide multiple type of contents.
|
||||
// We only process items relative to the newspaper
|
||||
@@ -49,12 +49,8 @@ class MediapartBridge extends FeedExpander
|
||||
$opt = [];
|
||||
$opt[CURLOPT_COOKIE] = 'MPSESSID=' . $mpsessid;
|
||||
|
||||
// Get the page
|
||||
$articlePage = getSimpleHTMLDOM(
|
||||
$newsItem->link . '?onglet=full',
|
||||
[],
|
||||
$opt
|
||||
);
|
||||
$pageUrl = $itemUrl . '?onglet=full';
|
||||
$articlePage = getSimpleHTMLDOM($pageUrl, [], $opt);
|
||||
|
||||
// Extract the article content
|
||||
$content = $articlePage->find('div.content-article', 0)->innertext;
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user