2016-09-05 18:05:19 +02:00
|
|
|
<?php
|
2022-07-01 15:10:30 +02:00
|
|
|
|
2018-11-16 21:48:59 +01:00
|
|
|
/**
|
2023-10-12 22:14:04 +02:00
|
|
|
* Expands an existing feed
|
2018-11-16 21:48:59 +01:00
|
|
|
*/
|
2016-09-10 19:11:09 +02:00
|
|
|
abstract class FeedExpander extends BridgeAbstract
|
|
|
|
{
|
2018-11-18 16:18:40 +01:00
|
|
|
const FEED_TYPE_RSS_1_0 = 'RSS_1_0';
|
|
|
|
const FEED_TYPE_RSS_2_0 = 'RSS_2_0';
|
|
|
|
const FEED_TYPE_ATOM_1_0 = 'ATOM_1_0';
|
2022-07-01 15:10:30 +02:00
|
|
|
|
2023-10-12 22:14:04 +02:00
|
|
|
private string $feedType;
|
|
|
|
private FeedParser $feedParser;
|
|
|
|
private array $parsedFeed;
|
2022-07-01 15:10:30 +02:00
|
|
|
|
2023-10-12 22:14:04 +02:00
|
|
|
public function __construct(CacheInterface $cache, Logger $logger)
|
|
|
|
{
|
|
|
|
parent::__construct($cache, $logger);
|
|
|
|
$this->feedParser = new FeedParser();
|
|
|
|
}
|
2022-07-01 15:10:30 +02:00
|
|
|
|
2023-10-12 22:14:04 +02:00
|
|
|
public function collectExpandableDatas(string $url, $maxItems = -1)
|
2016-09-10 20:41:11 +02:00
|
|
|
{
|
2023-10-12 22:14:04 +02:00
|
|
|
if (!$url) {
|
2022-08-06 22:46:28 +02:00
|
|
|
throw new \Exception('There is no $url for this RSS expander');
|
2016-09-10 20:41:11 +02:00
|
|
|
}
|
2023-10-12 22:14:04 +02:00
|
|
|
if ($maxItems === -1) {
|
|
|
|
$maxItems = 999;
|
|
|
|
}
|
|
|
|
$accept = [MrssFormat::MIME_TYPE, AtomFormat::MIME_TYPE, '*/*'];
|
|
|
|
$httpHeaders = ['Accept: ' . implode(', ', $accept)];
|
|
|
|
// Notice we do not use cache here on purpose. We want a fresh view of the RSS stream each time
|
|
|
|
$xmlString = getContents($url, $httpHeaders);
|
|
|
|
if ($xmlString === '') {
|
2023-03-20 19:11:51 +01:00
|
|
|
throw new \Exception(sprintf('Unable to parse xml from `%s` because we got the empty string', $url), 10);
|
2022-10-29 10:27:02 +02:00
|
|
|
}
|
|
|
|
// Maybe move this call earlier up the stack frames
|
|
|
|
// Disable triggering of the php error-handler and handle errors manually instead
|
|
|
|
libxml_use_internal_errors(true);
|
|
|
|
// Consider replacing libxml with https://www.php.net/domdocument
|
|
|
|
// Intentionally not using the silencing operator (@) because it has no effect here
|
2023-10-12 22:14:04 +02:00
|
|
|
$xml = simplexml_load_string(trim($xmlString));
|
|
|
|
if ($xml === false) {
|
2022-10-29 10:27:02 +02:00
|
|
|
$xmlErrors = libxml_get_errors();
|
|
|
|
foreach ($xmlErrors as $xmlError) {
|
2023-09-21 22:05:55 +02:00
|
|
|
Debug::log(trim($xmlError->message));
|
2022-10-29 10:27:02 +02:00
|
|
|
}
|
|
|
|
if ($xmlErrors) {
|
|
|
|
// Render only the first error into exception message
|
|
|
|
$firstXmlErrorMessage = $xmlErrors[0]->message;
|
|
|
|
}
|
2023-03-20 19:11:51 +01:00
|
|
|
throw new \Exception(sprintf('Unable to parse xml from `%s` %s', $url, $firstXmlErrorMessage ?? ''), 11);
|
2022-04-10 18:54:32 +02:00
|
|
|
}
|
2022-10-29 10:27:02 +02:00
|
|
|
// Restore previous behaviour in case other code relies on it being off
|
|
|
|
libxml_use_internal_errors(false);
|
|
|
|
|
2023-10-12 22:14:04 +02:00
|
|
|
// Currently only feed metadata (not items) are plucked out
|
|
|
|
$this->parsedFeed = $this->feedParser->parseFeed($xmlString);
|
|
|
|
|
|
|
|
if (isset($xml->item[0])) {
|
|
|
|
$this->feedType = self::FEED_TYPE_RSS_1_0;
|
|
|
|
$items = $xml->item;
|
|
|
|
} elseif (isset($xml->channel[0])) {
|
|
|
|
$this->feedType = self::FEED_TYPE_RSS_2_0;
|
|
|
|
$items = $xml->channel[0]->item;
|
|
|
|
} elseif (isset($xml->entry[0])) {
|
|
|
|
$this->feedType = self::FEED_TYPE_ATOM_1_0;
|
|
|
|
$items = $xml->entry;
|
2022-07-01 15:10:30 +02:00
|
|
|
} else {
|
2023-10-12 22:14:04 +02:00
|
|
|
throw new \Exception(sprintf('Unable to detect feed format from `%s`', $url));
|
2022-07-01 15:10:30 +02:00
|
|
|
}
|
2023-10-12 22:14:04 +02:00
|
|
|
foreach ($items as $item) {
|
|
|
|
$parsedItem = $this->parseItem($item);
|
|
|
|
if ($parsedItem) {
|
|
|
|
$this->items[] = $parsedItem;
|
2022-07-01 15:10:30 +02:00
|
|
|
}
|
2023-10-12 22:14:04 +02:00
|
|
|
if (count($this->items) >= $maxItems) {
|
|
|
|
break;
|
2022-07-01 15:10:30 +02:00
|
|
|
}
|
|
|
|
}
|
2023-10-12 22:14:04 +02:00
|
|
|
return $this;
|
2016-09-10 20:41:11 +02:00
|
|
|
}
|
2022-07-01 15:10:30 +02:00
|
|
|
|
2016-09-10 20:41:11 +02:00
|
|
|
/**
|
2023-10-12 22:14:04 +02:00
|
|
|
* @param \SimpleXMLElement $item The feed item to be parsed
|
2016-09-10 20:41:11 +02:00
|
|
|
*/
|
2016-09-12 10:39:34 +02:00
|
|
|
protected function parseItem($item)
|
|
|
|
{
|
2017-07-29 19:28:00 +02:00
|
|
|
switch ($this->feedType) {
|
2018-11-18 16:18:40 +01:00
|
|
|
case self::FEED_TYPE_RSS_1_0:
|
2023-10-12 22:14:04 +02:00
|
|
|
return $this->feedParser->parseRss1Item($item);
|
2018-11-18 16:18:40 +01:00
|
|
|
case self::FEED_TYPE_RSS_2_0:
|
2023-10-12 22:14:04 +02:00
|
|
|
return $this->feedParser->parseRss2Item($item);
|
2018-11-18 16:18:40 +01:00
|
|
|
case self::FEED_TYPE_ATOM_1_0:
|
2023-10-12 22:14:04 +02:00
|
|
|
return $this->feedParser->parseAtomItem($item);
|
2016-09-25 23:22:33 +02:00
|
|
|
default:
|
2022-08-06 22:46:28 +02:00
|
|
|
throw new \Exception(sprintf('Unknown version %s!', $this->getInput('version')));
|
2022-07-01 15:10:30 +02:00
|
|
|
}
|
2016-09-12 10:39:34 +02:00
|
|
|
}
|
2022-07-01 15:10:30 +02:00
|
|
|
|
2016-09-10 20:41:11 +02:00
|
|
|
public function getURI()
|
|
|
|
{
|
2023-10-12 22:14:04 +02:00
|
|
|
return $this->parsedFeed['uri'] ?? parent::getURI();
|
2016-09-10 20:41:11 +02:00
|
|
|
}
|
2022-07-01 15:10:30 +02:00
|
|
|
|
2016-09-10 20:41:11 +02:00
|
|
|
public function getName()
|
|
|
|
{
|
2023-10-12 22:14:04 +02:00
|
|
|
return $this->parsedFeed['title'] ?? parent::getName();
|
2016-09-10 20:41:11 +02:00
|
|
|
}
|
2022-07-01 15:10:30 +02:00
|
|
|
|
2018-08-21 11:46:47 -04:00
|
|
|
public function getIcon()
|
|
|
|
{
|
2023-10-12 22:14:04 +02:00
|
|
|
return $this->parsedFeed['icon'] ?? parent::getIcon();
|
2018-08-21 11:46:47 -04:00
|
|
|
}
|
2016-09-05 18:05:19 +02:00
|
|
|
}
|