2014-05-26 19:45:10 +02:00
|
|
|
<?php
|
2017-02-11 16:16:56 +01:00
|
|
|
class CollegeDeFranceBridge extends BridgeAbstract {
|
2015-11-03 22:28:44 +00:00
|
|
|
|
2017-02-11 16:16:56 +01:00
|
|
|
const MAINTAINER = 'pit-fgfjiudghdf';
|
|
|
|
const NAME = 'CollegeDeFrance';
|
|
|
|
const URI = 'http://www.college-de-france.fr/';
|
2016-09-25 17:04:28 +02:00
|
|
|
const CACHE_TIMEOUT = 10800; // 3h
|
2017-02-11 16:16:56 +01:00
|
|
|
const DESCRIPTION = 'Returns the latest audio and video from CollegeDeFrance';
|
2015-11-03 22:28:44 +00:00
|
|
|
|
2016-08-25 01:24:53 +02:00
|
|
|
public function collectData(){
|
2016-05-01 12:39:48 +02:00
|
|
|
$months = array(
|
|
|
|
'01' => 'janv.',
|
|
|
|
'02' => 'févr.',
|
|
|
|
'03' => 'mars',
|
|
|
|
'04' => 'avr.',
|
|
|
|
'05' => 'mai',
|
|
|
|
'06' => 'juin',
|
|
|
|
'07' => 'juil.',
|
|
|
|
'08' => 'août',
|
|
|
|
'09' => 'sept.',
|
|
|
|
'10' => 'oct.',
|
|
|
|
'11' => 'nov.',
|
|
|
|
'12' => 'déc.'
|
|
|
|
);
|
2017-02-11 16:16:56 +01:00
|
|
|
|
2016-05-01 12:39:48 +02:00
|
|
|
// The "API" used by the site returns a list of partial HTML in this form
|
|
|
|
/* <li>
|
|
|
|
* <a href="/site/thomas-romer/guestlecturer-2016-04-15-14h30.htm" data-target="after">
|
2017-02-11 16:16:56 +01:00
|
|
|
* <span class="date"><span class="list-icon list-icon-video"></span>
|
|
|
|
* <span class="list-icon list-icon-audio"></span>15 avr. 2016</span>
|
2016-05-01 12:39:48 +02:00
|
|
|
* <span class="lecturer">Christopher Hays</span>
|
|
|
|
* <span class='title'>Imagery of Divine Suckling in the Hebrew Bible and the Ancient Near East</span>
|
|
|
|
* </a>
|
|
|
|
* </li>
|
|
|
|
*/
|
2017-02-11 16:16:56 +01:00
|
|
|
$html = getSimpleHTMLDOM(self::URI
|
|
|
|
. 'components/search-audiovideo.jsp?fulltext=&siteid=1156951719600&lang=FR&type=all')
|
|
|
|
or returnServerError('Could not request CollegeDeFrance.');
|
|
|
|
|
2017-07-29 19:28:00 +02:00
|
|
|
foreach($html->find('a[data-target]') as $element) {
|
2016-08-22 18:55:59 +02:00
|
|
|
$item = array();
|
|
|
|
$item['title'] = $element->find('.title', 0)->plaintext;
|
2017-02-11 16:16:56 +01:00
|
|
|
|
2016-05-01 12:39:48 +02:00
|
|
|
// Most relative URLs contains an hour in addition to the date, so let's use it
|
|
|
|
// <a href="/site/yann-lecun/course-2016-04-08-11h00.htm" data-target="after">
|
2016-05-14 22:02:33 +02:00
|
|
|
//
|
2017-02-11 16:16:56 +01:00
|
|
|
// Sometimes there's an __1, perhaps it signifies an update
|
|
|
|
// "/site/patrick-boucheron/seminar-2016-05-03-18h00__1.htm"
|
2016-05-14 22:02:33 +02:00
|
|
|
//
|
|
|
|
// But unfortunately some don't have any hours info
|
2017-02-11 16:16:56 +01:00
|
|
|
// <a href="/site/institut-physique/
|
|
|
|
// The-Mysteries-of-Decoherence-Sebastien-Gleyzes-[Video-3-35].htm" data-target="after">
|
2016-05-14 22:02:33 +02:00
|
|
|
$timezone = new DateTimeZone('Europe/Paris');
|
2017-02-11 16:16:56 +01:00
|
|
|
|
|
|
|
// strpos($element->href, '201') will break in 2020 but it'll
|
|
|
|
// probably break prior to then due to site changes anyway
|
|
|
|
$d = DateTime::createFromFormat(
|
|
|
|
'!Y-m-d-H\hi',
|
|
|
|
substr($element->href, strpos($element->href, '201'), 16),
|
|
|
|
$timezone
|
|
|
|
);
|
|
|
|
|
2017-07-29 19:28:00 +02:00
|
|
|
if(!$d) {
|
2017-02-11 16:16:56 +01:00
|
|
|
$d = DateTime::createFromFormat(
|
|
|
|
'!d m Y',
|
|
|
|
trim(str_replace(
|
|
|
|
array_values($months),
|
|
|
|
array_keys($months),
|
|
|
|
$element->find('.date', 0)->plaintext
|
|
|
|
)),
|
|
|
|
$timezone
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
$item['timestamp'] = $d->format('U');
|
|
|
|
$item['content'] = $element->find('.lecturer', 0)->innertext
|
|
|
|
. ' - '
|
|
|
|
. $element->find('.title', 0)->innertext;
|
|
|
|
|
2016-08-30 11:23:55 +02:00
|
|
|
$item['uri'] = self::URI . $element->href;
|
2016-05-01 12:39:48 +02:00
|
|
|
$this->items[] = $item;
|
|
|
|
}
|
2015-11-03 22:28:44 +00:00
|
|
|
}
|
2016-05-01 12:39:48 +02:00
|
|
|
}
|