mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2025-01-16 21:58:21 +01:00
feat: preserve and reproduce podcast feeds (itunes rss module) (#3759)
This commit is contained in:
parent
408c2e5e91
commit
ef5bd83bd0
@ -50,7 +50,9 @@ class CssSelectorFeedExpanderBridge extends CssSelectorBridge
|
||||
$discard_thumbnail = $this->getInput('discard_thumbnail');
|
||||
$limit = $this->getInput('limit');
|
||||
|
||||
$source_feed = (new FeedParser())->parseFeed(getContents($url));
|
||||
$feedParser = new FeedParser();
|
||||
$xml = getContents($url);
|
||||
$source_feed = $feedParser->parseFeed($xml);
|
||||
$items = $source_feed['items'];
|
||||
|
||||
// Map Homepage URL (Default: Root page)
|
||||
|
@ -62,52 +62,27 @@ class NyaaTorrentsBridge extends BridgeAbstract
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
// Manually parsing because we need to acccess the nyaa namespace in the xml
|
||||
$xml = simplexml_load_string(getContents($this->getURI()));
|
||||
$channel = $xml->channel[0];
|
||||
$feed = [];
|
||||
$feed['title'] = trim((string)$channel->title);
|
||||
$feed['uri'] = trim((string)$channel->link);
|
||||
if (!empty($channel->image)) {
|
||||
$feed['icon'] = trim((string)$channel->image->url);
|
||||
}
|
||||
$items = $xml->channel[0]->item;
|
||||
foreach ($items as $feedItem) {
|
||||
$item = [
|
||||
'title' => (string) $feedItem->title,
|
||||
'uri' => (string) $feedItem->link,
|
||||
];
|
||||
|
||||
$feedParser = new FeedParser();
|
||||
$feed = $feedParser->parseFeed(getContents($this->getURI()));
|
||||
|
||||
foreach ($feed['items'] as $item) {
|
||||
$item['id'] = str_replace(['https://nyaa.si/download/', '.torrent'], '', $item['uri']);
|
||||
|
||||
$nyaaNamespace = (array)($feedItem->children('nyaa', true));
|
||||
$item = array_merge($item, $nyaaNamespace);
|
||||
|
||||
// Convert URI from torrent file to web page
|
||||
$item['uri'] = str_replace('/download/', '/view/', $item['uri']);
|
||||
$item['uri'] = str_replace('.torrent', '', $item['uri']);
|
||||
|
||||
$item_html = getSimpleHTMLDOMCached($item['uri']);
|
||||
if ($item_html) {
|
||||
// Retrieve full description from page contents
|
||||
$item_desc = str_get_html(
|
||||
markdownToHtml(html_entity_decode($item_html->find('#torrent-description', 0)->innertext))
|
||||
);
|
||||
|
||||
// Retrieve image for thumbnail or generic logo fallback
|
||||
$dom = getSimpleHTMLDOMCached($item['uri']);
|
||||
if ($dom) {
|
||||
$description = $dom->find('#torrent-description', 0)->innertext ?? '';
|
||||
$itemDom = str_get_html(markdownToHtml(html_entity_decode($description)));
|
||||
$item_image = $this->getURI() . 'static/img/avatar/default.png';
|
||||
foreach ($item_desc->find('img') as $img) {
|
||||
foreach ($itemDom->find('img') as $img) {
|
||||
if (strpos($img->src, 'prez') === false) {
|
||||
$item_image = $img->src;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
$item['enclosures'] = [$item_image];
|
||||
$item['content'] = $item_desc;
|
||||
$item['content'] = (string) $itemDom;
|
||||
}
|
||||
|
||||
$this->items[] = $item;
|
||||
if (count($this->items) >= 10) {
|
||||
break;
|
||||
|
@ -28,6 +28,7 @@
|
||||
"ext-openssl": "*",
|
||||
"ext-libxml": "*",
|
||||
"ext-simplexml": "*",
|
||||
"ext-dom": "*",
|
||||
"ext-json": "*"
|
||||
},
|
||||
"require-dev": {
|
||||
@ -38,8 +39,7 @@
|
||||
"ext-memcached": "Allows to use memcached as cache type",
|
||||
"ext-sqlite3": "Allows to use an SQLite database for caching",
|
||||
"ext-zip": "Required for FDroidRepoBridge",
|
||||
"ext-intl": "Required for OLXBridge",
|
||||
"ext-dom": "Allows to use some bridges based on XPath expressions"
|
||||
"ext-intl": "Required for OLXBridge"
|
||||
},
|
||||
"autoload-dev": {
|
||||
"psr-4": {
|
||||
|
@ -16,6 +16,8 @@ class AtomFormat extends FormatAbstract
|
||||
|
||||
public function stringify()
|
||||
{
|
||||
$document = new \DomDocument('1.0', $this->getCharset());
|
||||
|
||||
$feedUrl = get_current_url();
|
||||
|
||||
$extraInfos = $this->getExtraInfos();
|
||||
@ -25,7 +27,6 @@ class AtomFormat extends FormatAbstract
|
||||
$uri = $extraInfos['uri'];
|
||||
}
|
||||
|
||||
$document = new \DomDocument('1.0', $this->getCharset());
|
||||
$document->formatOutput = true;
|
||||
$feed = $document->createElementNS(self::ATOM_NS, 'feed');
|
||||
$document->appendChild($feed);
|
||||
@ -81,6 +82,7 @@ class AtomFormat extends FormatAbstract
|
||||
$linkSelf->setAttribute('href', $feedUrl);
|
||||
|
||||
foreach ($this->getItems() as $item) {
|
||||
$itemArray = $item->toArray();
|
||||
$entryTimestamp = $item->getTimestamp();
|
||||
$entryTitle = $item->getTitle();
|
||||
$entryContent = $item->getContent();
|
||||
@ -138,7 +140,19 @@ class AtomFormat extends FormatAbstract
|
||||
$entry->appendChild($id);
|
||||
$id->appendChild($document->createTextNode($entryID));
|
||||
|
||||
if (!empty($entryUri)) {
|
||||
if (isset($itemArray['itunes'])) {
|
||||
$feed->setAttributeNS('http://www.w3.org/2000/xmlns/', 'xmlns:itunes', self::ITUNES_NS);
|
||||
foreach ($itemArray['itunes'] as $itunesKey => $itunesValue) {
|
||||
$itunesProperty = $document->createElementNS(self::ITUNES_NS, $itunesKey);
|
||||
$entry->appendChild($itunesProperty);
|
||||
$itunesProperty->appendChild($document->createTextNode($itunesValue));
|
||||
}
|
||||
$itunesEnclosure = $document->createElement('enclosure');
|
||||
$entry->appendChild($itunesEnclosure);
|
||||
$itunesEnclosure->setAttribute('url', $itemArray['enclosure']['url']);
|
||||
$itunesEnclosure->setAttribute('length', $itemArray['enclosure']['length']);
|
||||
$itunesEnclosure->setAttribute('type', $itemArray['enclosure']['type']);
|
||||
} elseif (!empty($entryUri)) {
|
||||
$entryLinkAlternate = $document->createElement('link');
|
||||
$entry->appendChild($entryLinkAlternate);
|
||||
$entryLinkAlternate->setAttribute('rel', 'alternate');
|
||||
|
@ -34,6 +34,8 @@ class MrssFormat extends FormatAbstract
|
||||
|
||||
public function stringify()
|
||||
{
|
||||
$document = new \DomDocument('1.0', $this->getCharset());
|
||||
|
||||
$feedUrl = get_current_url();
|
||||
$extraInfos = $this->getExtraInfos();
|
||||
if (empty($extraInfos['uri'])) {
|
||||
@ -42,7 +44,6 @@ class MrssFormat extends FormatAbstract
|
||||
$uri = $extraInfos['uri'];
|
||||
}
|
||||
|
||||
$document = new \DomDocument('1.0', $this->getCharset());
|
||||
$document->formatOutput = true;
|
||||
$feed = $document->createElement('rss');
|
||||
$document->appendChild($feed);
|
||||
@ -99,22 +100,23 @@ class MrssFormat extends FormatAbstract
|
||||
$linkSelf->setAttribute('href', $feedUrl);
|
||||
|
||||
foreach ($this->getItems() as $item) {
|
||||
$itemArray = $item->toArray();
|
||||
$itemTimestamp = $item->getTimestamp();
|
||||
$itemTitle = $item->getTitle();
|
||||
$itemUri = $item->getURI();
|
||||
$itemContent = $item->getContent() ? break_annoying_html_tags($item->getContent()) : '';
|
||||
$entryID = $item->getUid();
|
||||
$itemUid = $item->getUid();
|
||||
$isPermaLink = 'false';
|
||||
|
||||
if (empty($entryID) && !empty($itemUri)) {
|
||||
if (empty($itemUid) && !empty($itemUri)) {
|
||||
// Fallback to provided URI
|
||||
$entryID = $itemUri;
|
||||
$itemUid = $itemUri;
|
||||
$isPermaLink = 'true';
|
||||
}
|
||||
|
||||
if (empty($entryID)) {
|
||||
if (empty($itemUid)) {
|
||||
// Fallback to title and content
|
||||
$entryID = hash('sha1', $itemTitle . $itemContent);
|
||||
$itemUid = hash('sha1', $itemTitle . $itemContent);
|
||||
}
|
||||
|
||||
$entry = $document->createElement('item');
|
||||
@ -126,7 +128,19 @@ class MrssFormat extends FormatAbstract
|
||||
$entryTitle->appendChild($document->createTextNode($itemTitle));
|
||||
}
|
||||
|
||||
if (!empty($itemUri)) {
|
||||
if (isset($itemArray['itunes'])) {
|
||||
$feed->setAttributeNS('http://www.w3.org/2000/xmlns/', 'xmlns:itunes', self::ITUNES_NS);
|
||||
foreach ($itemArray['itunes'] as $itunesKey => $itunesValue) {
|
||||
$itunesProperty = $document->createElementNS(self::ITUNES_NS, $itunesKey);
|
||||
$entry->appendChild($itunesProperty);
|
||||
$itunesProperty->appendChild($document->createTextNode($itunesValue));
|
||||
}
|
||||
$itunesEnclosure = $document->createElement('enclosure');
|
||||
$entry->appendChild($itunesEnclosure);
|
||||
$itunesEnclosure->setAttribute('url', $itemArray['enclosure']['url']);
|
||||
$itunesEnclosure->setAttribute('length', $itemArray['enclosure']['length']);
|
||||
$itunesEnclosure->setAttribute('type', $itemArray['enclosure']['type']);
|
||||
} if (!empty($itemUri)) {
|
||||
$entryLink = $document->createElement('link');
|
||||
$entry->appendChild($entryLink);
|
||||
$entryLink->appendChild($document->createTextNode($itemUri));
|
||||
@ -135,7 +149,7 @@ class MrssFormat extends FormatAbstract
|
||||
$entryGuid = $document->createElement('guid');
|
||||
$entryGuid->setAttribute('isPermaLink', $isPermaLink);
|
||||
$entry->appendChild($entryGuid);
|
||||
$entryGuid->appendChild($document->createTextNode($entryID));
|
||||
$entryGuid->appendChild($document->createTextNode($itemUid));
|
||||
|
||||
if (!empty($itemTimestamp)) {
|
||||
$entryPublished = $document->createElement('pubDate');
|
||||
|
@ -3,11 +3,13 @@
|
||||
declare(strict_types=1);
|
||||
|
||||
/**
|
||||
* Very basic and naive feed parser that srapes out rss 0.91, 1.0, 2.0 and atom 1.0.
|
||||
* Very basic and naive feed parser.
|
||||
*
|
||||
* Emit arrays meant to be used inside rss-bridge.
|
||||
* Scrapes out rss 0.91, 1.0, 2.0 and atom 1.0.
|
||||
*
|
||||
* The feed item structure is identical to that of FeedItem
|
||||
* Produce arrays meant to be used inside rss-bridge.
|
||||
*
|
||||
* The item structure is tweaked so that works with FeedItem
|
||||
*/
|
||||
final class FeedParser
|
||||
{
|
||||
@ -85,9 +87,7 @@ final class FeedParser
|
||||
|
||||
public function parseAtomItem(\SimpleXMLElement $feedItem): array
|
||||
{
|
||||
// Some ATOM entries also contain RSS 2.0 fields
|
||||
$item = $this->parseRss2Item($feedItem);
|
||||
|
||||
if (isset($feedItem->id)) {
|
||||
$item['uri'] = (string)$feedItem->id;
|
||||
}
|
||||
@ -131,8 +131,35 @@ final class FeedParser
|
||||
|
||||
public function parseRss2Item(\SimpleXMLElement $feedItem): array
|
||||
{
|
||||
// Primary data is compatible to 0.91 with some additional data
|
||||
$item = $this->parseRss091Item($feedItem);
|
||||
$item = [
|
||||
'uri' => '',
|
||||
'title' => '',
|
||||
'content' => '',
|
||||
'timestamp' => '',
|
||||
'author' => '',
|
||||
//'uid' => null,
|
||||
//'categories' => [],
|
||||
//'enclosures' => [],
|
||||
];
|
||||
|
||||
foreach ($feedItem as $k => $v) {
|
||||
$hasChildren = count($v) !== 0;
|
||||
if (!$hasChildren) {
|
||||
$item[$k] = (string) $v;
|
||||
}
|
||||
}
|
||||
|
||||
if (isset($feedItem->link)) {
|
||||
// todo: trim uri
|
||||
$item['uri'] = (string)$feedItem->link;
|
||||
}
|
||||
if (isset($feedItem->title)) {
|
||||
$item['title'] = html_entity_decode((string)$feedItem->title);
|
||||
}
|
||||
if (isset($feedItem->description)) {
|
||||
$item['content'] = (string)$feedItem->description;
|
||||
}
|
||||
|
||||
$namespaces = $feedItem->getNamespaces(true);
|
||||
if (isset($namespaces['dc'])) {
|
||||
$dc = $feedItem->children($namespaces['dc']);
|
||||
@ -140,7 +167,24 @@ final class FeedParser
|
||||
if (isset($namespaces['media'])) {
|
||||
$media = $feedItem->children($namespaces['media']);
|
||||
}
|
||||
|
||||
foreach ($namespaces as $namespaceName => $namespaceUrl) {
|
||||
if (in_array($namespaceName, ['', 'content', 'media'])) {
|
||||
continue;
|
||||
}
|
||||
$module = $feedItem->children($namespaceUrl);
|
||||
$item[$namespaceName] = [];
|
||||
foreach ($module as $moduleKey => $moduleValue) {
|
||||
$item[$namespaceName][$moduleKey] = (string) $moduleValue;
|
||||
}
|
||||
}
|
||||
if (isset($namespaces['itunes'])) {
|
||||
$enclosure = $feedItem->enclosure;
|
||||
$item['enclosure'] = [
|
||||
'url' => (string) $enclosure['url'],
|
||||
'length' => (string) $enclosure['length'],
|
||||
'type' => (string) $enclosure['type'],
|
||||
];
|
||||
}
|
||||
if (isset($feedItem->guid)) {
|
||||
// Pluck out a url from guid
|
||||
foreach ($feedItem->guid->attributes() as $attribute => $value) {
|
||||
@ -185,8 +229,26 @@ final class FeedParser
|
||||
|
||||
public function parseRss1Item(\SimpleXMLElement $feedItem): array
|
||||
{
|
||||
// 1.0 adds optional elements around the 0.91 standard
|
||||
$item = $this->parseRss091Item($feedItem);
|
||||
$item = [
|
||||
'uri' => '',
|
||||
'title' => '',
|
||||
'content' => '',
|
||||
'timestamp' => '',
|
||||
'author' => '',
|
||||
//'uid' => null,
|
||||
//'categories' => [],
|
||||
//'enclosures' => [],
|
||||
];
|
||||
if (isset($feedItem->link)) {
|
||||
// todo: trim uri
|
||||
$item['uri'] = (string)$feedItem->link;
|
||||
}
|
||||
if (isset($feedItem->title)) {
|
||||
$item['title'] = html_entity_decode((string)$feedItem->title);
|
||||
}
|
||||
if (isset($feedItem->description)) {
|
||||
$item['content'] = (string)$feedItem->description;
|
||||
}
|
||||
$namespaces = $feedItem->getNamespaces(true);
|
||||
if (isset($namespaces['dc'])) {
|
||||
$dc = $feedItem->children($namespaces['dc']);
|
||||
@ -199,32 +261,4 @@ final class FeedParser
|
||||
}
|
||||
return $item;
|
||||
}
|
||||
|
||||
public function parseRss091Item(\SimpleXMLElement $feedItem): array
|
||||
{
|
||||
$item = [
|
||||
'uri' => null,
|
||||
'title' => null,
|
||||
'content' => null,
|
||||
'timestamp' => null,
|
||||
'author' => null,
|
||||
//'uid' => null,
|
||||
//'categories' => [],
|
||||
//'enclosures' => [],
|
||||
];
|
||||
if (isset($feedItem->link)) {
|
||||
// todo: trim uri
|
||||
$item['uri'] = (string)$feedItem->link;
|
||||
}
|
||||
if (isset($feedItem->title)) {
|
||||
$item['title'] = html_entity_decode((string)$feedItem->title);
|
||||
}
|
||||
// rss 0.91 doesn't support timestamps
|
||||
// rss 0.91 doesn't support authors
|
||||
// rss 0.91 doesn't support enclosures
|
||||
if (isset($feedItem->description)) {
|
||||
$item['content'] = (string)$feedItem->description;
|
||||
}
|
||||
return $item;
|
||||
}
|
||||
}
|
||||
|
@ -2,6 +2,8 @@
|
||||
|
||||
abstract class FormatAbstract
|
||||
{
|
||||
public const ITUNES_NS = 'http://www.itunes.com/dtds/podcast-1.0.dtd';
|
||||
|
||||
const MIME_TYPE = 'text/plain';
|
||||
|
||||
protected string $charset = 'UTF-8';
|
||||
|
Loading…
x
Reference in New Issue
Block a user