mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2025-01-17 14:18:35 +01:00
feat: preserve and reproduce podcast feeds (itunes rss module) (#3759)
This commit is contained in:
parent
408c2e5e91
commit
ef5bd83bd0
@ -50,7 +50,9 @@ class CssSelectorFeedExpanderBridge extends CssSelectorBridge
|
|||||||
$discard_thumbnail = $this->getInput('discard_thumbnail');
|
$discard_thumbnail = $this->getInput('discard_thumbnail');
|
||||||
$limit = $this->getInput('limit');
|
$limit = $this->getInput('limit');
|
||||||
|
|
||||||
$source_feed = (new FeedParser())->parseFeed(getContents($url));
|
$feedParser = new FeedParser();
|
||||||
|
$xml = getContents($url);
|
||||||
|
$source_feed = $feedParser->parseFeed($xml);
|
||||||
$items = $source_feed['items'];
|
$items = $source_feed['items'];
|
||||||
|
|
||||||
// Map Homepage URL (Default: Root page)
|
// Map Homepage URL (Default: Root page)
|
||||||
|
@ -62,52 +62,27 @@ class NyaaTorrentsBridge extends BridgeAbstract
|
|||||||
|
|
||||||
public function collectData()
|
public function collectData()
|
||||||
{
|
{
|
||||||
// Manually parsing because we need to acccess the nyaa namespace in the xml
|
$feedParser = new FeedParser();
|
||||||
$xml = simplexml_load_string(getContents($this->getURI()));
|
$feed = $feedParser->parseFeed(getContents($this->getURI()));
|
||||||
$channel = $xml->channel[0];
|
|
||||||
$feed = [];
|
|
||||||
$feed['title'] = trim((string)$channel->title);
|
|
||||||
$feed['uri'] = trim((string)$channel->link);
|
|
||||||
if (!empty($channel->image)) {
|
|
||||||
$feed['icon'] = trim((string)$channel->image->url);
|
|
||||||
}
|
|
||||||
$items = $xml->channel[0]->item;
|
|
||||||
foreach ($items as $feedItem) {
|
|
||||||
$item = [
|
|
||||||
'title' => (string) $feedItem->title,
|
|
||||||
'uri' => (string) $feedItem->link,
|
|
||||||
];
|
|
||||||
|
|
||||||
|
|
||||||
|
foreach ($feed['items'] as $item) {
|
||||||
$item['id'] = str_replace(['https://nyaa.si/download/', '.torrent'], '', $item['uri']);
|
$item['id'] = str_replace(['https://nyaa.si/download/', '.torrent'], '', $item['uri']);
|
||||||
|
|
||||||
$nyaaNamespace = (array)($feedItem->children('nyaa', true));
|
|
||||||
$item = array_merge($item, $nyaaNamespace);
|
|
||||||
|
|
||||||
// Convert URI from torrent file to web page
|
|
||||||
$item['uri'] = str_replace('/download/', '/view/', $item['uri']);
|
$item['uri'] = str_replace('/download/', '/view/', $item['uri']);
|
||||||
$item['uri'] = str_replace('.torrent', '', $item['uri']);
|
$item['uri'] = str_replace('.torrent', '', $item['uri']);
|
||||||
|
$dom = getSimpleHTMLDOMCached($item['uri']);
|
||||||
$item_html = getSimpleHTMLDOMCached($item['uri']);
|
if ($dom) {
|
||||||
if ($item_html) {
|
$description = $dom->find('#torrent-description', 0)->innertext ?? '';
|
||||||
// Retrieve full description from page contents
|
$itemDom = str_get_html(markdownToHtml(html_entity_decode($description)));
|
||||||
$item_desc = str_get_html(
|
|
||||||
markdownToHtml(html_entity_decode($item_html->find('#torrent-description', 0)->innertext))
|
|
||||||
);
|
|
||||||
|
|
||||||
// Retrieve image for thumbnail or generic logo fallback
|
|
||||||
$item_image = $this->getURI() . 'static/img/avatar/default.png';
|
$item_image = $this->getURI() . 'static/img/avatar/default.png';
|
||||||
foreach ($item_desc->find('img') as $img) {
|
foreach ($itemDom->find('img') as $img) {
|
||||||
if (strpos($img->src, 'prez') === false) {
|
if (strpos($img->src, 'prez') === false) {
|
||||||
$item_image = $img->src;
|
$item_image = $img->src;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
$item['enclosures'] = [$item_image];
|
$item['enclosures'] = [$item_image];
|
||||||
$item['content'] = $item_desc;
|
$item['content'] = (string) $itemDom;
|
||||||
}
|
}
|
||||||
|
|
||||||
$this->items[] = $item;
|
$this->items[] = $item;
|
||||||
if (count($this->items) >= 10) {
|
if (count($this->items) >= 10) {
|
||||||
break;
|
break;
|
||||||
|
@ -28,6 +28,7 @@
|
|||||||
"ext-openssl": "*",
|
"ext-openssl": "*",
|
||||||
"ext-libxml": "*",
|
"ext-libxml": "*",
|
||||||
"ext-simplexml": "*",
|
"ext-simplexml": "*",
|
||||||
|
"ext-dom": "*",
|
||||||
"ext-json": "*"
|
"ext-json": "*"
|
||||||
},
|
},
|
||||||
"require-dev": {
|
"require-dev": {
|
||||||
@ -38,8 +39,7 @@
|
|||||||
"ext-memcached": "Allows to use memcached as cache type",
|
"ext-memcached": "Allows to use memcached as cache type",
|
||||||
"ext-sqlite3": "Allows to use an SQLite database for caching",
|
"ext-sqlite3": "Allows to use an SQLite database for caching",
|
||||||
"ext-zip": "Required for FDroidRepoBridge",
|
"ext-zip": "Required for FDroidRepoBridge",
|
||||||
"ext-intl": "Required for OLXBridge",
|
"ext-intl": "Required for OLXBridge"
|
||||||
"ext-dom": "Allows to use some bridges based on XPath expressions"
|
|
||||||
},
|
},
|
||||||
"autoload-dev": {
|
"autoload-dev": {
|
||||||
"psr-4": {
|
"psr-4": {
|
||||||
|
@ -16,6 +16,8 @@ class AtomFormat extends FormatAbstract
|
|||||||
|
|
||||||
public function stringify()
|
public function stringify()
|
||||||
{
|
{
|
||||||
|
$document = new \DomDocument('1.0', $this->getCharset());
|
||||||
|
|
||||||
$feedUrl = get_current_url();
|
$feedUrl = get_current_url();
|
||||||
|
|
||||||
$extraInfos = $this->getExtraInfos();
|
$extraInfos = $this->getExtraInfos();
|
||||||
@ -25,7 +27,6 @@ class AtomFormat extends FormatAbstract
|
|||||||
$uri = $extraInfos['uri'];
|
$uri = $extraInfos['uri'];
|
||||||
}
|
}
|
||||||
|
|
||||||
$document = new \DomDocument('1.0', $this->getCharset());
|
|
||||||
$document->formatOutput = true;
|
$document->formatOutput = true;
|
||||||
$feed = $document->createElementNS(self::ATOM_NS, 'feed');
|
$feed = $document->createElementNS(self::ATOM_NS, 'feed');
|
||||||
$document->appendChild($feed);
|
$document->appendChild($feed);
|
||||||
@ -81,6 +82,7 @@ class AtomFormat extends FormatAbstract
|
|||||||
$linkSelf->setAttribute('href', $feedUrl);
|
$linkSelf->setAttribute('href', $feedUrl);
|
||||||
|
|
||||||
foreach ($this->getItems() as $item) {
|
foreach ($this->getItems() as $item) {
|
||||||
|
$itemArray = $item->toArray();
|
||||||
$entryTimestamp = $item->getTimestamp();
|
$entryTimestamp = $item->getTimestamp();
|
||||||
$entryTitle = $item->getTitle();
|
$entryTitle = $item->getTitle();
|
||||||
$entryContent = $item->getContent();
|
$entryContent = $item->getContent();
|
||||||
@ -138,7 +140,19 @@ class AtomFormat extends FormatAbstract
|
|||||||
$entry->appendChild($id);
|
$entry->appendChild($id);
|
||||||
$id->appendChild($document->createTextNode($entryID));
|
$id->appendChild($document->createTextNode($entryID));
|
||||||
|
|
||||||
if (!empty($entryUri)) {
|
if (isset($itemArray['itunes'])) {
|
||||||
|
$feed->setAttributeNS('http://www.w3.org/2000/xmlns/', 'xmlns:itunes', self::ITUNES_NS);
|
||||||
|
foreach ($itemArray['itunes'] as $itunesKey => $itunesValue) {
|
||||||
|
$itunesProperty = $document->createElementNS(self::ITUNES_NS, $itunesKey);
|
||||||
|
$entry->appendChild($itunesProperty);
|
||||||
|
$itunesProperty->appendChild($document->createTextNode($itunesValue));
|
||||||
|
}
|
||||||
|
$itunesEnclosure = $document->createElement('enclosure');
|
||||||
|
$entry->appendChild($itunesEnclosure);
|
||||||
|
$itunesEnclosure->setAttribute('url', $itemArray['enclosure']['url']);
|
||||||
|
$itunesEnclosure->setAttribute('length', $itemArray['enclosure']['length']);
|
||||||
|
$itunesEnclosure->setAttribute('type', $itemArray['enclosure']['type']);
|
||||||
|
} elseif (!empty($entryUri)) {
|
||||||
$entryLinkAlternate = $document->createElement('link');
|
$entryLinkAlternate = $document->createElement('link');
|
||||||
$entry->appendChild($entryLinkAlternate);
|
$entry->appendChild($entryLinkAlternate);
|
||||||
$entryLinkAlternate->setAttribute('rel', 'alternate');
|
$entryLinkAlternate->setAttribute('rel', 'alternate');
|
||||||
|
@ -34,6 +34,8 @@ class MrssFormat extends FormatAbstract
|
|||||||
|
|
||||||
public function stringify()
|
public function stringify()
|
||||||
{
|
{
|
||||||
|
$document = new \DomDocument('1.0', $this->getCharset());
|
||||||
|
|
||||||
$feedUrl = get_current_url();
|
$feedUrl = get_current_url();
|
||||||
$extraInfos = $this->getExtraInfos();
|
$extraInfos = $this->getExtraInfos();
|
||||||
if (empty($extraInfos['uri'])) {
|
if (empty($extraInfos['uri'])) {
|
||||||
@ -42,7 +44,6 @@ class MrssFormat extends FormatAbstract
|
|||||||
$uri = $extraInfos['uri'];
|
$uri = $extraInfos['uri'];
|
||||||
}
|
}
|
||||||
|
|
||||||
$document = new \DomDocument('1.0', $this->getCharset());
|
|
||||||
$document->formatOutput = true;
|
$document->formatOutput = true;
|
||||||
$feed = $document->createElement('rss');
|
$feed = $document->createElement('rss');
|
||||||
$document->appendChild($feed);
|
$document->appendChild($feed);
|
||||||
@ -99,22 +100,23 @@ class MrssFormat extends FormatAbstract
|
|||||||
$linkSelf->setAttribute('href', $feedUrl);
|
$linkSelf->setAttribute('href', $feedUrl);
|
||||||
|
|
||||||
foreach ($this->getItems() as $item) {
|
foreach ($this->getItems() as $item) {
|
||||||
|
$itemArray = $item->toArray();
|
||||||
$itemTimestamp = $item->getTimestamp();
|
$itemTimestamp = $item->getTimestamp();
|
||||||
$itemTitle = $item->getTitle();
|
$itemTitle = $item->getTitle();
|
||||||
$itemUri = $item->getURI();
|
$itemUri = $item->getURI();
|
||||||
$itemContent = $item->getContent() ? break_annoying_html_tags($item->getContent()) : '';
|
$itemContent = $item->getContent() ? break_annoying_html_tags($item->getContent()) : '';
|
||||||
$entryID = $item->getUid();
|
$itemUid = $item->getUid();
|
||||||
$isPermaLink = 'false';
|
$isPermaLink = 'false';
|
||||||
|
|
||||||
if (empty($entryID) && !empty($itemUri)) {
|
if (empty($itemUid) && !empty($itemUri)) {
|
||||||
// Fallback to provided URI
|
// Fallback to provided URI
|
||||||
$entryID = $itemUri;
|
$itemUid = $itemUri;
|
||||||
$isPermaLink = 'true';
|
$isPermaLink = 'true';
|
||||||
}
|
}
|
||||||
|
|
||||||
if (empty($entryID)) {
|
if (empty($itemUid)) {
|
||||||
// Fallback to title and content
|
// Fallback to title and content
|
||||||
$entryID = hash('sha1', $itemTitle . $itemContent);
|
$itemUid = hash('sha1', $itemTitle . $itemContent);
|
||||||
}
|
}
|
||||||
|
|
||||||
$entry = $document->createElement('item');
|
$entry = $document->createElement('item');
|
||||||
@ -126,7 +128,19 @@ class MrssFormat extends FormatAbstract
|
|||||||
$entryTitle->appendChild($document->createTextNode($itemTitle));
|
$entryTitle->appendChild($document->createTextNode($itemTitle));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!empty($itemUri)) {
|
if (isset($itemArray['itunes'])) {
|
||||||
|
$feed->setAttributeNS('http://www.w3.org/2000/xmlns/', 'xmlns:itunes', self::ITUNES_NS);
|
||||||
|
foreach ($itemArray['itunes'] as $itunesKey => $itunesValue) {
|
||||||
|
$itunesProperty = $document->createElementNS(self::ITUNES_NS, $itunesKey);
|
||||||
|
$entry->appendChild($itunesProperty);
|
||||||
|
$itunesProperty->appendChild($document->createTextNode($itunesValue));
|
||||||
|
}
|
||||||
|
$itunesEnclosure = $document->createElement('enclosure');
|
||||||
|
$entry->appendChild($itunesEnclosure);
|
||||||
|
$itunesEnclosure->setAttribute('url', $itemArray['enclosure']['url']);
|
||||||
|
$itunesEnclosure->setAttribute('length', $itemArray['enclosure']['length']);
|
||||||
|
$itunesEnclosure->setAttribute('type', $itemArray['enclosure']['type']);
|
||||||
|
} if (!empty($itemUri)) {
|
||||||
$entryLink = $document->createElement('link');
|
$entryLink = $document->createElement('link');
|
||||||
$entry->appendChild($entryLink);
|
$entry->appendChild($entryLink);
|
||||||
$entryLink->appendChild($document->createTextNode($itemUri));
|
$entryLink->appendChild($document->createTextNode($itemUri));
|
||||||
@ -135,7 +149,7 @@ class MrssFormat extends FormatAbstract
|
|||||||
$entryGuid = $document->createElement('guid');
|
$entryGuid = $document->createElement('guid');
|
||||||
$entryGuid->setAttribute('isPermaLink', $isPermaLink);
|
$entryGuid->setAttribute('isPermaLink', $isPermaLink);
|
||||||
$entry->appendChild($entryGuid);
|
$entry->appendChild($entryGuid);
|
||||||
$entryGuid->appendChild($document->createTextNode($entryID));
|
$entryGuid->appendChild($document->createTextNode($itemUid));
|
||||||
|
|
||||||
if (!empty($itemTimestamp)) {
|
if (!empty($itemTimestamp)) {
|
||||||
$entryPublished = $document->createElement('pubDate');
|
$entryPublished = $document->createElement('pubDate');
|
||||||
|
@ -3,11 +3,13 @@
|
|||||||
declare(strict_types=1);
|
declare(strict_types=1);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Very basic and naive feed parser that srapes out rss 0.91, 1.0, 2.0 and atom 1.0.
|
* Very basic and naive feed parser.
|
||||||
*
|
*
|
||||||
* Emit arrays meant to be used inside rss-bridge.
|
* Scrapes out rss 0.91, 1.0, 2.0 and atom 1.0.
|
||||||
*
|
*
|
||||||
* The feed item structure is identical to that of FeedItem
|
* Produce arrays meant to be used inside rss-bridge.
|
||||||
|
*
|
||||||
|
* The item structure is tweaked so that works with FeedItem
|
||||||
*/
|
*/
|
||||||
final class FeedParser
|
final class FeedParser
|
||||||
{
|
{
|
||||||
@ -85,9 +87,7 @@ final class FeedParser
|
|||||||
|
|
||||||
public function parseAtomItem(\SimpleXMLElement $feedItem): array
|
public function parseAtomItem(\SimpleXMLElement $feedItem): array
|
||||||
{
|
{
|
||||||
// Some ATOM entries also contain RSS 2.0 fields
|
|
||||||
$item = $this->parseRss2Item($feedItem);
|
$item = $this->parseRss2Item($feedItem);
|
||||||
|
|
||||||
if (isset($feedItem->id)) {
|
if (isset($feedItem->id)) {
|
||||||
$item['uri'] = (string)$feedItem->id;
|
$item['uri'] = (string)$feedItem->id;
|
||||||
}
|
}
|
||||||
@ -131,8 +131,35 @@ final class FeedParser
|
|||||||
|
|
||||||
public function parseRss2Item(\SimpleXMLElement $feedItem): array
|
public function parseRss2Item(\SimpleXMLElement $feedItem): array
|
||||||
{
|
{
|
||||||
// Primary data is compatible to 0.91 with some additional data
|
$item = [
|
||||||
$item = $this->parseRss091Item($feedItem);
|
'uri' => '',
|
||||||
|
'title' => '',
|
||||||
|
'content' => '',
|
||||||
|
'timestamp' => '',
|
||||||
|
'author' => '',
|
||||||
|
//'uid' => null,
|
||||||
|
//'categories' => [],
|
||||||
|
//'enclosures' => [],
|
||||||
|
];
|
||||||
|
|
||||||
|
foreach ($feedItem as $k => $v) {
|
||||||
|
$hasChildren = count($v) !== 0;
|
||||||
|
if (!$hasChildren) {
|
||||||
|
$item[$k] = (string) $v;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isset($feedItem->link)) {
|
||||||
|
// todo: trim uri
|
||||||
|
$item['uri'] = (string)$feedItem->link;
|
||||||
|
}
|
||||||
|
if (isset($feedItem->title)) {
|
||||||
|
$item['title'] = html_entity_decode((string)$feedItem->title);
|
||||||
|
}
|
||||||
|
if (isset($feedItem->description)) {
|
||||||
|
$item['content'] = (string)$feedItem->description;
|
||||||
|
}
|
||||||
|
|
||||||
$namespaces = $feedItem->getNamespaces(true);
|
$namespaces = $feedItem->getNamespaces(true);
|
||||||
if (isset($namespaces['dc'])) {
|
if (isset($namespaces['dc'])) {
|
||||||
$dc = $feedItem->children($namespaces['dc']);
|
$dc = $feedItem->children($namespaces['dc']);
|
||||||
@ -140,7 +167,24 @@ final class FeedParser
|
|||||||
if (isset($namespaces['media'])) {
|
if (isset($namespaces['media'])) {
|
||||||
$media = $feedItem->children($namespaces['media']);
|
$media = $feedItem->children($namespaces['media']);
|
||||||
}
|
}
|
||||||
|
foreach ($namespaces as $namespaceName => $namespaceUrl) {
|
||||||
|
if (in_array($namespaceName, ['', 'content', 'media'])) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
$module = $feedItem->children($namespaceUrl);
|
||||||
|
$item[$namespaceName] = [];
|
||||||
|
foreach ($module as $moduleKey => $moduleValue) {
|
||||||
|
$item[$namespaceName][$moduleKey] = (string) $moduleValue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (isset($namespaces['itunes'])) {
|
||||||
|
$enclosure = $feedItem->enclosure;
|
||||||
|
$item['enclosure'] = [
|
||||||
|
'url' => (string) $enclosure['url'],
|
||||||
|
'length' => (string) $enclosure['length'],
|
||||||
|
'type' => (string) $enclosure['type'],
|
||||||
|
];
|
||||||
|
}
|
||||||
if (isset($feedItem->guid)) {
|
if (isset($feedItem->guid)) {
|
||||||
// Pluck out a url from guid
|
// Pluck out a url from guid
|
||||||
foreach ($feedItem->guid->attributes() as $attribute => $value) {
|
foreach ($feedItem->guid->attributes() as $attribute => $value) {
|
||||||
@ -185,8 +229,26 @@ final class FeedParser
|
|||||||
|
|
||||||
public function parseRss1Item(\SimpleXMLElement $feedItem): array
|
public function parseRss1Item(\SimpleXMLElement $feedItem): array
|
||||||
{
|
{
|
||||||
// 1.0 adds optional elements around the 0.91 standard
|
$item = [
|
||||||
$item = $this->parseRss091Item($feedItem);
|
'uri' => '',
|
||||||
|
'title' => '',
|
||||||
|
'content' => '',
|
||||||
|
'timestamp' => '',
|
||||||
|
'author' => '',
|
||||||
|
//'uid' => null,
|
||||||
|
//'categories' => [],
|
||||||
|
//'enclosures' => [],
|
||||||
|
];
|
||||||
|
if (isset($feedItem->link)) {
|
||||||
|
// todo: trim uri
|
||||||
|
$item['uri'] = (string)$feedItem->link;
|
||||||
|
}
|
||||||
|
if (isset($feedItem->title)) {
|
||||||
|
$item['title'] = html_entity_decode((string)$feedItem->title);
|
||||||
|
}
|
||||||
|
if (isset($feedItem->description)) {
|
||||||
|
$item['content'] = (string)$feedItem->description;
|
||||||
|
}
|
||||||
$namespaces = $feedItem->getNamespaces(true);
|
$namespaces = $feedItem->getNamespaces(true);
|
||||||
if (isset($namespaces['dc'])) {
|
if (isset($namespaces['dc'])) {
|
||||||
$dc = $feedItem->children($namespaces['dc']);
|
$dc = $feedItem->children($namespaces['dc']);
|
||||||
@ -199,32 +261,4 @@ final class FeedParser
|
|||||||
}
|
}
|
||||||
return $item;
|
return $item;
|
||||||
}
|
}
|
||||||
|
|
||||||
public function parseRss091Item(\SimpleXMLElement $feedItem): array
|
|
||||||
{
|
|
||||||
$item = [
|
|
||||||
'uri' => null,
|
|
||||||
'title' => null,
|
|
||||||
'content' => null,
|
|
||||||
'timestamp' => null,
|
|
||||||
'author' => null,
|
|
||||||
//'uid' => null,
|
|
||||||
//'categories' => [],
|
|
||||||
//'enclosures' => [],
|
|
||||||
];
|
|
||||||
if (isset($feedItem->link)) {
|
|
||||||
// todo: trim uri
|
|
||||||
$item['uri'] = (string)$feedItem->link;
|
|
||||||
}
|
|
||||||
if (isset($feedItem->title)) {
|
|
||||||
$item['title'] = html_entity_decode((string)$feedItem->title);
|
|
||||||
}
|
|
||||||
// rss 0.91 doesn't support timestamps
|
|
||||||
// rss 0.91 doesn't support authors
|
|
||||||
// rss 0.91 doesn't support enclosures
|
|
||||||
if (isset($feedItem->description)) {
|
|
||||||
$item['content'] = (string)$feedItem->description;
|
|
||||||
}
|
|
||||||
return $item;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
@ -2,6 +2,8 @@
|
|||||||
|
|
||||||
abstract class FormatAbstract
|
abstract class FormatAbstract
|
||||||
{
|
{
|
||||||
|
public const ITUNES_NS = 'http://www.itunes.com/dtds/podcast-1.0.dtd';
|
||||||
|
|
||||||
const MIME_TYPE = 'text/plain';
|
const MIME_TYPE = 'text/plain';
|
||||||
|
|
||||||
protected string $charset = 'UTF-8';
|
protected string $charset = 'UTF-8';
|
||||||
|
Loading…
x
Reference in New Issue
Block a user