1
0
mirror of https://github.com/RSS-Bridge/rss-bridge.git synced 2025-08-11 11:04:36 +02:00

bridges: use BridgeAbstract::getSimpleHTMLDOM

instead of BridgeAbstract::file_get_html

Signed-off-by: Pierre Mazière <pierre.maziere@gmx.com>
This commit is contained in:
Pierre Mazière
2016-07-08 19:06:35 +02:00
parent f43bbda83e
commit 3c0d13c1bb
121 changed files with 1212 additions and 396 deletions

View File

@@ -12,49 +12,49 @@ class CourrierInternationalBridge extends BridgeAbstract{
}
public function collectData(array $param){
$html = '';
$html = $this->file_get_html('http://www.courrierinternational.com/') or $this->returnServerError('Error.');
$html = $this->getSimpleHTMLDOM('http://www.courrierinternational.com/') or $this->returnServerError('Error.');
$element = $html->find("article");
$article_count = 1;
$article_count = 1;
foreach($element as $article) {
$item = new \Item();
$item->uri = $article->parent->getAttribute("href");
if(strpos($item->uri, "http") === FALSE) {
$item->uri = "http://courrierinternational.fr/".$item->uri;
}
$page = $this->file_get_html($item->uri);
$page = $this->getSimpleHTMLDOM($item->uri);
$cleaner = new HTMLSanitizer();
$item->content = $cleaner->sanitize($page->find("div.article-text")[0]);
$item->title = strip_tags($article->find(".title")[0]);
$dateTime = date_parse($page->find("time")[0]);
$item->timestamp = mktime(
$dateTime['hour'],
$dateTime['minute'],
$dateTime['second'],
$dateTime['month'],
$dateTime['day'],
$dateTime['hour'],
$dateTime['minute'],
$dateTime['second'],
$dateTime['month'],
$dateTime['day'],
$dateTime['year']
);
$this->items[] = $item;
$article_count ++;
if($article_count > 5) break;
}