mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2025-01-17 14:18:35 +01:00
052844f5e1
simplehtmldom 1.9 introduced new functions to recursively remove nodes from the DOM. This allows removing elements without the need to re-load the document by using $html->load($html->save()), which is very inefficient. Find more information about remove() at https://simplehtmldom.sourceforge.io/docs/1.9/api/simple_html_dom_node/remove/
64 lines
1.7 KiB
PHP
64 lines
1.7 KiB
PHP
<?php
|
|
class SIMARBridge extends BridgeAbstract {
|
|
const NAME = 'SIMAR';
|
|
const URI = 'http://www.simar-louresodivelas.pt/';
|
|
const DESCRIPTION = 'Verificar estado da rede SIMAR';
|
|
const MAINTAINER = 'somini';
|
|
const PARAMETERS = array(
|
|
'Público' => array(
|
|
'interventions' => array(
|
|
'type' => 'checkbox',
|
|
'name' => 'Incluir Intervenções?',
|
|
'defaultValue' => 'checked',
|
|
)
|
|
)
|
|
);
|
|
|
|
public function collectData() {
|
|
$html = getSimpleHTMLDOM(self::getURI())
|
|
or returnServerError('Could not load content');
|
|
$e_home = $html->find('#home', 0)
|
|
or returnServerError('Invalid site structure');
|
|
|
|
foreach($e_home->find('span') as $element) {
|
|
$item = array();
|
|
|
|
$item['title'] = 'Rotura: ' . $element->plaintext;
|
|
$item['content'] = $element->innertext;
|
|
$item['uid'] = 'urn:sha1:' . hash('sha1', $item['content']);
|
|
|
|
$this->items[] = $item;
|
|
}
|
|
|
|
if ($this->getInput('interventions')) {
|
|
$e_main1 = $html->find('#menu1', 0)
|
|
or returnServerError('Invalid site structure');
|
|
|
|
foreach ($e_main1->find('a') as $element) {
|
|
$item = array();
|
|
|
|
$item['title'] = 'Intervenção: ' . $element->plaintext;
|
|
$item['uri'] = self::getURI() . $element->href;
|
|
$item['content'] = $element->innertext;
|
|
|
|
/* Try to get the actual contents for this kind of item */
|
|
$item_html = getSimpleHTMLDOMCached($item['uri']);
|
|
if ($item_html) {
|
|
$e_item = $item_html->find('.auto-style59', 0);
|
|
foreach($e_item->find('p') as $paragraph) {
|
|
/* Remove empty paragraphs */
|
|
if (preg_match('/^(\W| )+$/', $paragraph->innertext) == 1) {
|
|
$paragraph->remove();
|
|
}
|
|
}
|
|
if ($e_item) {
|
|
$item['content'] = $e_item->innertext;
|
|
}
|
|
}
|
|
|
|
$this->items[] = $item;
|
|
}
|
|
}
|
|
}
|
|
}
|