mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2025-10-28 21:55:50 +01:00
feat: introduce convenience function get_sitemap (#4773)
* feat: introduce function get_sitemap Convenience function to fetch and parse xml sitemap from url * lint
This commit is contained in:
@@ -13,21 +13,11 @@ class CybernewsBridge extends BridgeAbstract
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$sitemapUrl = self::URI . '/news-sitemap.xml';
|
||||
$urls = get_sitemap('https://cybernews.com/news-sitemap.xml');
|
||||
|
||||
$sitemapXml = getContents($sitemapUrl);
|
||||
if (!$sitemapXml) {
|
||||
throwServerException('Unable to retrieve Cybernews sitemap');
|
||||
}
|
||||
|
||||
$sitemap = simplexml_load_string($sitemapXml, null, LIBXML_NOCDATA | LIBXML_NONET);
|
||||
if (!$sitemap) {
|
||||
throwServerException('Unable to parse Cybernews sitemap');
|
||||
}
|
||||
|
||||
foreach ($sitemap->url as $entry) {
|
||||
$url = trim((string) $entry->loc);
|
||||
$lastmod = trim((string) $entry->lastmod);
|
||||
foreach ($urls as $entry) {
|
||||
$url = $entry['loc'];
|
||||
$lastmod = $entry['lastmod'];
|
||||
|
||||
if (!$url) {
|
||||
continue;
|
||||
@@ -41,13 +31,12 @@ class CybernewsBridge extends BridgeAbstract
|
||||
// continue;
|
||||
// }
|
||||
|
||||
$namespaces = $entry->getNamespaces(true);
|
||||
$title = '';
|
||||
|
||||
if (isset($namespaces['news'])) {
|
||||
$news = $entry->children($namespaces['news'])->news;
|
||||
if (isset($entry['news'])) {
|
||||
$news = $entry['news'];
|
||||
if ($news) {
|
||||
$title = trim((string) $news->title);
|
||||
$title = trim((string) $news['title']);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -13,36 +13,25 @@ class InvestorsObserverBridge extends BridgeAbstract
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$sitemapXml = getContents(self::URI . '/news-sitemap.xml');
|
||||
$urls = get_sitemap(self::URI . '/news-sitemap.xml');
|
||||
|
||||
if (!$sitemapXml) {
|
||||
throwServerException('Unable to retrieve sitemap');
|
||||
}
|
||||
foreach ($urls as $entry) {
|
||||
$title = null;
|
||||
$pubDate = null;
|
||||
|
||||
$sitemap = simplexml_load_string($sitemapXml, null, LIBXML_NOCDATA | LIBXML_NONET);
|
||||
|
||||
if (!$sitemap) {
|
||||
throwServerException('Unable to parse sitemap');
|
||||
}
|
||||
|
||||
foreach ($sitemap->url as $entry) {
|
||||
$url = trim((string) $entry->loc);
|
||||
$lastmod = trim((string) $entry->lastmod);
|
||||
$url = trim((string) $entry['loc']);
|
||||
$lastmod = trim((string) $entry['lastmod']);
|
||||
|
||||
if (!$url) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$namespaces = $entry->getNamespaces(true);
|
||||
$title = '';
|
||||
$pubDate = null;
|
||||
|
||||
if (isset($namespaces['news'])) {
|
||||
$news = $entry->children($namespaces['news'])->news;
|
||||
if (isset($entry['news'])) {
|
||||
$news = $entry['news'];
|
||||
|
||||
if ($news) {
|
||||
$title = trim((string) $news->title);
|
||||
$pubDate = trim((string) $news->publication_date);
|
||||
$title = trim((string) $news['title']);
|
||||
$pubDate = trim((string) $news['publication_date']);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -12,14 +12,11 @@ class SkyArteBridge extends BridgeAbstract
|
||||
|
||||
public function collectData()
|
||||
{
|
||||
$doc = new \DOMDocument();
|
||||
$doc->loadXML(getContents('https://arte.sky.it/sitemap-mostre-eventi.xml'));
|
||||
$urls = get_sitemap('https://arte.sky.it/sitemap-mostre-eventi.xml');
|
||||
|
||||
$count = 0;
|
||||
foreach ($doc->getElementsByTagName('url') as $url) {
|
||||
$loc = $url->getElementsByTagName('loc')->item(0)->nodeValue;
|
||||
|
||||
$lastmod = $url->getElementsByTagName('lastmod')->item(0)->nodeValue;
|
||||
foreach ($urls as $url) {
|
||||
$loc = $url['loc'];
|
||||
|
||||
if (!$loc) {
|
||||
continue;
|
||||
@@ -36,7 +33,7 @@ class SkyArteBridge extends BridgeAbstract
|
||||
'title' => $event['title'],
|
||||
'uri' => $loc,
|
||||
'uid' => $loc,
|
||||
'timestamp' => $lastmod,
|
||||
'timestamp' => $url['lastmod'],
|
||||
'content' => $event['content'],
|
||||
'categories' => $event['categories'],
|
||||
'enclosures' => $event['enclosures'],
|
||||
|
||||
@@ -342,3 +342,16 @@ Json::decode($json);
|
||||
```
|
||||
|
||||
[Defined in lib/utils.php](https://github.com/RSS-Bridge/rss-bridge/blob/master/lib/utils.php)
|
||||
|
||||
# get_sitemap(string $url): array
|
||||
|
||||
Convenience function to fetch urls from xml sitemap.
|
||||
|
||||
```php
|
||||
$urls = get_sitemap('https://arte.sky.it/sitemap-mostre-eventi.xml');
|
||||
|
||||
foreach ($urls as $url) {
|
||||
$loc = $url['loc'];
|
||||
$lastmod = $url['lastmod'];
|
||||
}
|
||||
```
|
||||
|
||||
@@ -1,5 +1,30 @@
|
||||
<?php
|
||||
|
||||
function get_sitemap(string $url): array
|
||||
{
|
||||
$doc = new \DOMDocument();
|
||||
$doc->loadXML(getContents($url));
|
||||
$urls = [];
|
||||
|
||||
foreach ($doc->getElementsByTagName('url') as $url) {
|
||||
$item = [
|
||||
'loc' => $url->getElementsByTagName('loc')->item(0)->nodeValue ?? null,
|
||||
'lastmod' => $url->getElementsByTagName('lastmod')->item(0)->nodeValue ?? null,
|
||||
'changefreq' => $url->getElementsByTagName('changefreq')->item(0)->nodeValue ?? null,
|
||||
'priority' => $url->getElementsByTagName('priority')->item(0)->nodeValue ?? null,
|
||||
];
|
||||
|
||||
$news = $url->getElementsByTagNameNS('http://www.google.com/schemas/sitemap-news/0.9', '*');
|
||||
foreach ($news as $element) {
|
||||
$localName = $element->localName;
|
||||
$prefix = $element->prefix;
|
||||
$item[$prefix][$localName] = $element->nodeValue;
|
||||
}
|
||||
$urls[] = $item;
|
||||
}
|
||||
return $urls;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch data from an http url
|
||||
*
|
||||
|
||||
Reference in New Issue
Block a user