2018-03-11 15:38:07 +01:00
|
|
|
<?php
|
|
|
|
class RadioMelodieBridge extends BridgeAbstract {
|
|
|
|
const NAME = 'Radio Melodie Actu';
|
2019-04-20 22:19:22 +02:00
|
|
|
const URI = 'https://www.radiomelodie.com';
|
2018-03-11 15:38:07 +01:00
|
|
|
const DESCRIPTION = 'Retourne les actualités publiées par Radio Melodie';
|
|
|
|
const MAINTAINER = 'sysadminstory';
|
|
|
|
|
2018-10-26 18:07:34 +02:00
|
|
|
public function getIcon() {
|
2019-04-20 22:19:22 +02:00
|
|
|
return self::URI . '/img/favicon.png';
|
2018-10-26 18:07:34 +02:00
|
|
|
}
|
|
|
|
|
2018-03-11 15:38:07 +01:00
|
|
|
public function collectData(){
|
2019-04-20 22:19:22 +02:00
|
|
|
$html = getSimpleHTMLDOM(self::URI . '/actu/')
|
2018-03-11 15:38:07 +01:00
|
|
|
or returnServerError('Could not request Radio Melodie.');
|
2019-06-01 12:12:17 +02:00
|
|
|
$list = $html->find('div[class=displayList]', 0)->children();
|
2021-06-25 22:12:33 +02:00
|
|
|
|
|
|
|
$dateFormat = '%A %e %B %Y à %H:%M';
|
|
|
|
// Set locale and Timezone to parse the date
|
|
|
|
setlocale (LC_TIME, 'fr_FR.utf8');
|
|
|
|
date_default_timezone_set('Europe/Paris');
|
|
|
|
|
2018-03-11 15:38:07 +01:00
|
|
|
foreach($list as $element) {
|
2019-04-20 22:19:22 +02:00
|
|
|
if($element->tag == 'a') {
|
|
|
|
$articleURL = self::URI . $element->href;
|
|
|
|
$article = getSimpleHTMLDOM($articleURL);
|
2021-08-04 06:04:45 +02:00
|
|
|
$this->rewriteAudioPlayers($article);
|
|
|
|
// Reload the modified content
|
|
|
|
$article = str_get_html($article->save());
|
2019-06-01 12:12:17 +02:00
|
|
|
$textDOM = $article->find('article', 0);
|
2019-04-20 22:19:22 +02:00
|
|
|
|
|
|
|
// Initialise arrays
|
|
|
|
$item = array();
|
|
|
|
$audio = array();
|
|
|
|
$picture = array();
|
|
|
|
|
|
|
|
// Get the Main picture URL
|
2021-02-17 06:07:35 +01:00
|
|
|
$picture[] = self::URI . $article->find('div[id=pictureTitleSupport]', 0)->find('img', 0)->src;
|
2019-06-01 12:12:17 +02:00
|
|
|
$audioHTML = $article->find('audio');
|
2019-04-20 22:19:22 +02:00
|
|
|
|
2019-06-01 12:12:17 +02:00
|
|
|
// Add the audio element to the enclosure
|
2019-04-20 22:19:22 +02:00
|
|
|
foreach($audioHTML as $audioElement) {
|
2019-06-01 12:12:17 +02:00
|
|
|
$audioURL = $audioElement->src;
|
2019-04-20 22:19:22 +02:00
|
|
|
$audio[] = $audioURL;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Rewrite pictures URL
|
2019-06-01 12:12:17 +02:00
|
|
|
$imgs = $textDOM->find('img[src^="http://www.radiomelodie.com/image.php]');
|
2019-04-20 22:19:22 +02:00
|
|
|
foreach($imgs as $img) {
|
|
|
|
$img->src = $this->rewriteImage($img->src);
|
2019-06-02 13:03:26 +02:00
|
|
|
$article->save();
|
2019-04-20 22:19:22 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// Remove Google Ads
|
2019-06-01 12:12:17 +02:00
|
|
|
$ads = $article->find('div[class=adInline]');
|
2019-04-20 22:19:22 +02:00
|
|
|
foreach($ads as $ad) {
|
2019-06-02 13:03:26 +02:00
|
|
|
$ad->outertext = '';
|
|
|
|
$article->save();
|
2019-04-20 22:19:22 +02:00
|
|
|
}
|
|
|
|
|
2019-06-01 12:12:17 +02:00
|
|
|
// Remove Radio Melodie Logo
|
|
|
|
$logoHTML = $article->find('div[id=logoArticleRM]', 0);
|
2019-06-02 13:03:26 +02:00
|
|
|
$logoHTML->outertext = '';
|
|
|
|
$article->save();
|
2019-06-01 12:12:17 +02:00
|
|
|
|
|
|
|
$author = $article->find('p[class=AuthorName]', 0)->plaintext;
|
2019-04-20 22:19:22 +02:00
|
|
|
|
2021-06-25 22:12:33 +02:00
|
|
|
// Handle date to timestamp
|
|
|
|
$dateHTML = $article->find('p[class=date]', 0)->plaintext;
|
|
|
|
preg_match('/\| ([^-]*)( - .*|)$/', $dateHTML, $matches);
|
|
|
|
$dateText = $matches[1];
|
|
|
|
$dateArray = strptime($dateText, $dateFormat);
|
|
|
|
$timestamp = mktime(
|
|
|
|
$dateArray['tm_hour'],
|
|
|
|
$dateArray['tm_min'],
|
|
|
|
$dateArray['tm_sec'],
|
|
|
|
$dateArray['tm_mon'] + 1,
|
|
|
|
$dateArray['tm_mday'],
|
|
|
|
$dateArray['tm_year'] + 1900
|
|
|
|
);
|
|
|
|
|
2019-04-20 22:19:22 +02:00
|
|
|
$item['enclosures'] = array_merge($picture, $audio);
|
|
|
|
$item['author'] = $author;
|
|
|
|
$item['uri'] = $articleURL;
|
|
|
|
$item['title'] = $article->find('meta[property=og:title]', 0)->content;
|
2021-06-25 22:12:33 +02:00
|
|
|
if($timestamp !== false) {
|
|
|
|
$item['timestamp'] = $timestamp;
|
|
|
|
}
|
2019-06-01 12:12:17 +02:00
|
|
|
|
|
|
|
// Header Image
|
|
|
|
$header = '<img src="' . $picture[0] . '"/>';
|
|
|
|
|
|
|
|
// Remove the Date and Author part
|
2019-06-02 13:03:26 +02:00
|
|
|
$textDOM->find('div[class=AuthorDate]', 0)->outertext = '';
|
2021-06-25 22:12:33 +02:00
|
|
|
|
|
|
|
// Remove Facebook javascript
|
|
|
|
$textDOM->find('script[src^=https://connect.facebook.net]', 0)->outertext = '';
|
|
|
|
|
|
|
|
// Rewrite relative Links
|
|
|
|
$textDOM = defaultLinkTo($textDOM, self::URI . '/');
|
|
|
|
|
2019-06-02 13:03:26 +02:00
|
|
|
$article->save();
|
2021-08-04 06:04:45 +02:00
|
|
|
//$this->rewriteAudioPlayers($textDOM);
|
2019-04-20 22:19:22 +02:00
|
|
|
$text = $textDOM->innertext;
|
2021-06-25 22:12:33 +02:00
|
|
|
$item['content'] = '<h1>' . $item['title'] . '</h1>' . $dateHTML . '<br/>' . $header . $text;
|
2019-04-20 22:19:22 +02:00
|
|
|
$this->items[] = $item;
|
|
|
|
}
|
2018-03-11 15:38:07 +01:00
|
|
|
}
|
|
|
|
}
|
2019-04-20 22:19:22 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Function to rewrite image URL to use the real Image URL and not the resized one (which is very slow)
|
|
|
|
*/
|
|
|
|
private function rewriteImage($url)
|
|
|
|
{
|
|
|
|
$parts = explode('?', $url);
|
2019-06-01 12:12:17 +02:00
|
|
|
parse_str(html_entity_decode($parts[1]), $params);
|
2019-04-20 22:19:22 +02:00
|
|
|
return self::URI . '/' . $params['image'];
|
|
|
|
|
|
|
|
}
|
2021-08-04 06:04:45 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Function to rewrite Audio Players to use the <audio> tag and not the javascript audio player
|
|
|
|
*/
|
|
|
|
private function rewriteAudioPlayers($html)
|
|
|
|
{
|
|
|
|
// Find all audio Players
|
|
|
|
$audioPlayers = $html->find('div[class=audioPlayer]');
|
|
|
|
|
|
|
|
foreach($audioPlayers as $audioPlayer) {
|
|
|
|
// Get the javascript content below the player
|
|
|
|
$js = $audioPlayer->next_sibling();
|
|
|
|
|
|
|
|
// Extract the audio file URL
|
|
|
|
preg_match('/wavesurfer[0-9]+.load\(\'(.*)\'\)/m', $js->innertext, $urls);
|
|
|
|
|
|
|
|
// Create the plain HTML <audio> content to play this audio file
|
|
|
|
$content = '<audio style="width: 100%" src="' . $urls[1] . '" controls ></audio>';
|
|
|
|
|
|
|
|
// Replace the <script> tag by the <audio> tag
|
|
|
|
$js->outertext = $content;
|
|
|
|
// Remove the initial Audio Player
|
|
|
|
$audioPlayer->outertext = '';
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
2018-03-11 15:38:07 +01:00
|
|
|
}
|