mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2025-01-17 22:28:22 +01:00
[ARDMediathekBridge] Switch to JSON-API (#2380)
* Switch ARDMediathekBridge to JSON-API The html screen scraping approach of ARDMediathekBridge did not work reliably. I could not find one show for which the item list was not empty using the html screen scraping approach. The proposed change uses the JSON-API of the WebApp. Although there is still room for improvement (feed title, better understanding of the API, more accurate mimic of the webapp's behavior, de-pagination …), it does work with this change. Indicate that now full URLs as well as just the ID are accepted.
This commit is contained in:
parent
368a198321
commit
f259fa7f9f
@ -4,14 +4,48 @@ class ARDMediathekBridge extends BridgeAbstract {
|
||||
const URI = 'https://www.ardmediathek.de';
|
||||
const DESCRIPTION = 'Feed of any series in the ARD-Mediathek, specified by its path';
|
||||
const MAINTAINER = 'yue-dongchen';
|
||||
/*
|
||||
* Number of Items to be requested from ARDmediathek API
|
||||
* 12 has been observed on the wild
|
||||
* 29 is the highest successfully tested value
|
||||
* More Items could be fetched via pagination
|
||||
* The JSON-field pagination holds more information on that
|
||||
* @const PAGESIZE number of requested items
|
||||
*/
|
||||
const PAGESIZE = 29;
|
||||
/*
|
||||
* The URL Prefix of the (Webapp-)API
|
||||
* @const APIENDPOINT https-URL of the used endpoint
|
||||
*/
|
||||
const APIENDPOINT = 'https://api.ardmediathek.de/page-gateway/widgets/ard/asset/';
|
||||
/*
|
||||
* The URL prefix of the video link
|
||||
* URLs from the webapp include a slug containing titles of show, episode, and tv station.
|
||||
* It seems to work without that.
|
||||
* @const VIDEOLINKPREFIX https-URL prefix of video links
|
||||
*/
|
||||
const VIDEOLINKPREFIX = 'https://www.ardmediathek.de/video/';
|
||||
/*
|
||||
* The requested width of the preview image
|
||||
* 432 has been observed on the wild
|
||||
* The webapp seems to also compute and add the height value
|
||||
* It seems to works without that.
|
||||
* @const IMAGEWIDTH width in px of the preview image
|
||||
*/
|
||||
const IMAGEWIDTH = 432;
|
||||
/*
|
||||
* Placeholder that will be replace by IMAGEWIDTH in the preview image URL
|
||||
* @const IMAGEWIDTHPLACEHOLDER
|
||||
*/
|
||||
const IMAGEWIDTHPLACEHOLDER = '{width}';
|
||||
|
||||
const PARAMETERS = array(
|
||||
array(
|
||||
'path' => array(
|
||||
'name' => 'Path',
|
||||
'name' => 'Show Link or ID',
|
||||
'required' => true,
|
||||
'title' => 'Enter without trailing slash',
|
||||
'defaultValue' => '45-min/Y3JpZDovL25kci5kZS8xMzkx'
|
||||
'title' => 'Link to the show page or just its alphanumeric suffix',
|
||||
'defaultValue' => 'https://www.ardmediathek.de/sendung/45-min/Y3JpZDovL25kci5kZS8xMzkx/'
|
||||
)
|
||||
)
|
||||
);
|
||||
@ -19,17 +53,38 @@ class ARDMediathekBridge extends BridgeAbstract {
|
||||
public function collectData() {
|
||||
date_default_timezone_set('Europe/Berlin');
|
||||
|
||||
$url = 'https://www.ardmediathek.de/sendung/' . $this->getInput('path') . '/';
|
||||
$html = getSimpleHTMLDOM($url);
|
||||
$html = defaultLinkTo($html, $url);
|
||||
$pathComponents = explode('/', $this->getInput('path'));
|
||||
if (empty($pathComponents)) {
|
||||
returnClientError('Path may not be empty');
|
||||
}
|
||||
if (count($pathComponents) < 2) {
|
||||
$showID = $pathComponents[0];
|
||||
} else {
|
||||
$lastKey = count($pathComponents) - 1;
|
||||
$showID = $pathComponents[$lastKey];
|
||||
if (strlen($showID) === 0) {
|
||||
$showID = $pathComponents[$lastKey - 1];
|
||||
}
|
||||
}
|
||||
|
||||
foreach($html->find('a.Root-sc-1ytw7qu-0') as $video) {
|
||||
$url = SELF::APIENDPOINT . $showID . '/?pageSize=' . SELF::PAGESIZE;
|
||||
$rawJSON = getContents($url);
|
||||
$processedJSON = json_decode($rawJSON);
|
||||
|
||||
foreach($processedJSON->teasers as $video) {
|
||||
$item = array();
|
||||
$item['uri'] = $video->href;
|
||||
$item['title'] = $video->find('h3', 0)->plaintext;
|
||||
$item['content'] = '<img src="' . $video->find('img', 0)->src . '" />';
|
||||
$item['timestamp'] = strtotime(mb_substr($video->find('div.Line-epbftj-1', 0)->plaintext, 0, 10));
|
||||
|
||||
// there is also ->links->self->id, ->links->self->urlId, ->links->target->id, ->links->target->urlId
|
||||
$item['uri'] = SELF::VIDEOLINKPREFIX . $video->id . '/';
|
||||
// there is also ->mediumTitle and ->shortTitle
|
||||
$item['title'] = $video->longTitle;
|
||||
// in the test, aspect16x9 was the only child of images, not sure whether that is always true
|
||||
$item['enclosures'] = array(
|
||||
str_replace(SELF::IMAGEWIDTHPLACEHOLDER, SELF::IMAGEWIDTH, $video->images->aspect16x9->src)
|
||||
);
|
||||
$item['content'] = '<img src="' . $item['enclosures'][0] . '" /><p>';
|
||||
$item['timestamp'] = $video->broadcastedOn;
|
||||
$item['uid'] = $video->id;
|
||||
$item['author'] = $video->publicationService->name;
|
||||
$this->items[] = $item;
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user