1
0
mirror of https://github.com/RSS-Bridge/rss-bridge.git synced 2025-10-16 17:16:19 +02:00

[XPathBridge] Catch null values and improvements

- By adding null coalescing fallbacks, we avoid any errors that might
  occur by functions returning a null value.
- Add function return types.
- More consistently use single quote characters.
This commit is contained in:
Mynacol
2025-09-04 08:36:00 +00:00
parent e8446cb513
commit 525e5ddb1d

View File

@@ -12,10 +12,11 @@ class XPathBridge extends XPathAbstract
'url' => [ 'url' => [
'name' => 'Enter web page URL', 'name' => 'Enter web page URL',
'title' => <<<"EOL" 'title' => <<<'EOL'
You can specify any website URL which serves data suited for display in RSS feeds You can specify any website URL which serves data suited for display in RSS feeds
(for example a news blog). (for example a news blog).
EOL, 'type' => 'text', EOL,
'type' => 'text',
'exampleValue' => 'https://news.blizzard.com/en-en', 'exampleValue' => 'https://news.blizzard.com/en-en',
'defaultValue' => 'https://news.blizzard.com/en-en', 'defaultValue' => 'https://news.blizzard.com/en-en',
'required' => true 'required' => true
@@ -23,12 +24,13 @@ EOL, 'type' => 'text',
'item' => [ 'item' => [
'name' => 'Item selector', 'name' => 'Item selector',
'title' => <<<"EOL" 'title' => <<<'EOL'
Enter an XPath expression matching a list of dom nodes, each node containing one Enter an XPath expression matching a list of dom nodes, each node containing one
feed article item in total (usually a surrounding &lt;div&gt; or &lt;span&gt; tag). This will feed article item in total (usually a surrounding &lt;div&gt; or &lt;span&gt; tag). This will
be the context nodes for all of the following expressions. This expression usually be the context nodes for all of the following expressions. This expression usually
starts with a single forward slash. starts with a single forward slash.
EOL, 'type' => 'text', EOL,
'type' => 'text',
'exampleValue' => '/html/body/div/div[4]/div[2]/div[2]/div/div/section/ol/li/article', 'exampleValue' => '/html/body/div/div[4]/div[2]/div[2]/div/div/section/ol/li/article',
'defaultValue' => '/html/body/div/div[4]/div[2]/div[2]/div/div/section/ol/li/article', 'defaultValue' => '/html/body/div/div[4]/div[2]/div[2]/div/div/section/ol/li/article',
'required' => true 'required' => true
@@ -36,11 +38,12 @@ EOL, 'type' => 'text',
'title' => [ 'title' => [
'name' => 'Item title selector', 'name' => 'Item title selector',
'title' => <<<"EOL" 'title' => <<<'EOL'
This expression should match a node contained within each article item node This expression should match a node contained within each article item node
containing the article headline. It should start with a dot followed by two containing the article headline. It should start with a dot followed by two
forward slashes, referring to any descendant nodes of the article item node. forward slashes, referring to any descendant nodes of the article item node.
EOL, 'type' => 'text', EOL,
'type' => 'text',
'exampleValue' => './/div/div[2]/h2', 'exampleValue' => './/div/div[2]/h2',
'defaultValue' => './/div/div[2]/h2', 'defaultValue' => './/div/div[2]/h2',
'required' => true 'required' => true
@@ -48,12 +51,13 @@ EOL, 'type' => 'text',
'content' => [ 'content' => [
'name' => 'Item description selector', 'name' => 'Item description selector',
'title' => <<<"EOL" 'title' => <<<'EOL'
This expression should match a node contained within each article item node This expression should match a node contained within each article item node
containing the article content or description. It should start with a dot containing the article content or description. It should start with a dot
followed by two forward slashes, referring to any descendant nodes of the followed by two forward slashes, referring to any descendant nodes of the
article item node. article item node.
EOL, 'type' => 'text', EOL,
'type' => 'text',
'exampleValue' => './/div[@class="ArticleListItem-description"]/div[@class="h6"]', 'exampleValue' => './/div[@class="ArticleListItem-description"]/div[@class="h6"]',
'defaultValue' => './/div[@class="ArticleListItem-description"]/div[@class="h6"]', 'defaultValue' => './/div[@class="ArticleListItem-description"]/div[@class="h6"]',
'required' => false 'required' => false
@@ -61,7 +65,7 @@ EOL, 'type' => 'text',
'raw_content' => [ 'raw_content' => [
'name' => 'Use raw item description', 'name' => 'Use raw item description',
'title' => <<<"EOL" 'title' => <<<'EOL'
Whether to use the raw item description or to replace certain characters with Whether to use the raw item description or to replace certain characters with
special significance in HTML by HTML entities (using the PHP function htmlspecialchars). special significance in HTML by HTML entities (using the PHP function htmlspecialchars).
EOL, EOL,
@@ -72,13 +76,14 @@ EOL, 'type' => 'text',
'uri' => [ 'uri' => [
'name' => 'Item URL selector', 'name' => 'Item URL selector',
'title' => <<<"EOL" 'title' => <<<'EOL'
This expression should match a node's attribute containing the article URL This expression should match a node's attribute containing the article URL
(usually the href attribute of an &lt;a&gt; tag). It should start with a dot (usually the href attribute of an &lt;a&gt; tag). It should start with a dot
followed by two forward slashes, referring to any descendant nodes of followed by two forward slashes, referring to any descendant nodes of
the article item node. Attributes can be selected by prepending an @ char the article item node. Attributes can be selected by prepending an @ char
before the attributes name. before the attributes name.
EOL, 'type' => 'text', EOL,
'type' => 'text',
'exampleValue' => './/a[@class="ArticleLink ArticleLink"]/@href', 'exampleValue' => './/a[@class="ArticleLink ArticleLink"]/@href',
'defaultValue' => './/a[@class="ArticleLink ArticleLink"]/@href', 'defaultValue' => './/a[@class="ArticleLink ArticleLink"]/@href',
'required' => false 'required' => false
@@ -86,24 +91,26 @@ EOL, 'type' => 'text',
'author' => [ 'author' => [
'name' => 'Item author selector', 'name' => 'Item author selector',
'title' => <<<"EOL" 'title' => <<<'EOL'
This expression should match a node contained within each article item This expression should match a node contained within each article item
node containing the article author's name. It should start with a dot node containing the article author's name. It should start with a dot
followed by two forward slashes, referring to any descendant nodes of followed by two forward slashes, referring to any descendant nodes of
the article item node. the article item node.
EOL, 'type' => 'text', EOL,
'type' => 'text',
'required' => false 'required' => false
], ],
'timestamp' => [ 'timestamp' => [
'name' => 'Item date selector', 'name' => 'Item date selector',
'title' => <<<"EOL" 'title' => <<<'EOL'
This expression should match a node or node's attribute containing the This expression should match a node or node's attribute containing the
article timestamp or date (parsable by PHP's strtotime function). It article timestamp or date (parsable by PHP's strtotime function). It
should start with a dot followed by two forward slashes, referring to should start with a dot followed by two forward slashes, referring to
any descendant nodes of the article item node. Attributes can be any descendant nodes of the article item node. Attributes can be
selected by prepending an @ char before the attributes name. selected by prepending an @ char before the attributes name.
EOL, 'type' => 'text', EOL,
'type' => 'text',
'exampleValue' => './/time[@class="ArticleListItem-footerTimestamp"]/@timestamp', 'exampleValue' => './/time[@class="ArticleListItem-footerTimestamp"]/@timestamp',
'defaultValue' => './/time[@class="ArticleListItem-footerTimestamp"]/@timestamp', 'defaultValue' => './/time[@class="ArticleListItem-footerTimestamp"]/@timestamp',
'required' => false 'required' => false
@@ -111,13 +118,14 @@ EOL, 'type' => 'text',
'enclosures' => [ 'enclosures' => [
'name' => 'Item image selector', 'name' => 'Item image selector',
'title' => <<<"EOL" 'title' => <<<'EOL'
This expression should match a node's attribute containing an article This expression should match a node's attribute containing an article
image URL (usually the src attribute of an &lt;img&gt; tag or a style image URL (usually the src attribute of an &lt;img&gt; tag or a style
attribute). It should start with a dot followed by two forward slashes, attribute). It should start with a dot followed by two forward slashes,
referring to any descendant nodes of the article item node. Attributes referring to any descendant nodes of the article item node. Attributes
can be selected by prepending an @ char before the attributes name. can be selected by prepending an @ char before the attributes name.
EOL, 'type' => 'text', EOL,
'type' => 'text',
'exampleValue' => './/div[@class="ArticleListItem-image"]/@style', 'exampleValue' => './/div[@class="ArticleListItem-image"]/@style',
'defaultValue' => './/div[@class="ArticleListItem-image"]/@style', 'defaultValue' => './/div[@class="ArticleListItem-image"]/@style',
'required' => false 'required' => false
@@ -125,7 +133,7 @@ EOL, 'type' => 'text',
'categories' => [ 'categories' => [
'name' => 'Item category selector', 'name' => 'Item category selector',
'title' => <<<"EOL" 'title' => <<<'EOL'
This expression should match a node or node's attribute contained This expression should match a node or node's attribute contained
within each article item node containing the article category. This within each article item node containing the article category. This
could be inside &lt;div&gt; or &lt;span&gt; tags or sometimes be hidden could be inside &lt;div&gt; or &lt;span&gt; tags or sometimes be hidden
@@ -133,7 +141,8 @@ in a data attribute. It should start with a dot followed by two
forward slashes, referring to any descendant nodes of the article forward slashes, referring to any descendant nodes of the article
item node. Attributes can be selected by prepending an @ char item node. Attributes can be selected by prepending an @ char
before the attributes name. before the attributes name.
EOL, 'type' => 'text', EOL,
'type' => 'text',
'exampleValue' => './/div[@class="ArticleListItem-label"]', 'exampleValue' => './/div[@class="ArticleListItem-label"]',
'defaultValue' => './/div[@class="ArticleListItem-label"]', 'defaultValue' => './/div[@class="ArticleListItem-label"]',
'required' => false 'required' => false
@@ -141,12 +150,13 @@ EOL, 'type' => 'text',
'fix_encoding' => [ 'fix_encoding' => [
'name' => 'Fix encoding', 'name' => 'Fix encoding',
'title' => <<<"EOL" 'title' => <<<'EOL'
Check this to fix feed encoding by invoking PHP's utf8_decode Check this to fix feed encoding by invoking PHP's utf8_decode
function on all extracted texts. Try this in case you see "broken" or function on all extracted texts. Try this in case you see "broken" or
"weird" characters in your feed where you'd normally expect umlauts "weird" characters in your feed where you'd normally expect umlauts
or any other non-ascii characters. or any other non-ascii characters.
EOL, 'type' => 'checkbox', EOL,
'type' => 'checkbox',
'required' => false 'required' => false
], ],
@@ -157,36 +167,36 @@ EOL, 'type' => 'checkbox',
* Source Web page URL (should provide either HTML or XML content) * Source Web page URL (should provide either HTML or XML content)
* @return string * @return string
*/ */
protected function getSourceUrl() protected function getSourceUrl(): string
{ {
return $this->encodeUri($this->getInput('url')); return $this->encodeUri($this->getInput('url') ?? '');
} }
/** /**
* XPath expression for extracting the feed items from the source page * XPath expression for extracting the feed items from the source page
* @return string * @return string
*/ */
protected function getExpressionItem() protected function getExpressionItem(): string
{ {
return urldecode($this->getInput('item')); return urldecode($this->getInput('item') ?? '');
} }
/** /**
* XPath expression for extracting an item title from the item context * XPath expression for extracting an item title from the item context
* @return string * @return string
*/ */
protected function getExpressionItemTitle() protected function getExpressionItemTitle(): string
{ {
return urldecode($this->getInput('title')); return urldecode($this->getInput('title') ?? '');
} }
/** /**
* XPath expression for extracting an item's content from the item context * XPath expression for extracting an item's content from the item context
* @return string * @return string
*/ */
protected function getExpressionItemContent() protected function getExpressionItemContent(): string
{ {
return urldecode($this->getInput('content')); return urldecode($this->getInput('content') ?? '');
} }
/** /**
@@ -202,27 +212,27 @@ EOL, 'type' => 'checkbox',
* XPath expression for extracting an item link from the item context * XPath expression for extracting an item link from the item context
* @return string * @return string
*/ */
protected function getExpressionItemUri() protected function getExpressionItemUri(): string
{ {
return urldecode($this->getInput('uri')); return urldecode($this->getInput('uri') ?? '');
} }
/** /**
* XPath expression for extracting an item author from the item context * XPath expression for extracting an item author from the item context
* @return string * @return string
*/ */
protected function getExpressionItemAuthor() protected function getExpressionItemAuthor(): string
{ {
return urldecode($this->getInput('author')); return urldecode($this->getInput('author') ?? '');
} }
/** /**
* XPath expression for extracting an item timestamp from the item context * XPath expression for extracting an item timestamp from the item context
* @return string * @return string
*/ */
protected function getExpressionItemTimestamp() protected function getExpressionItemTimestamp(): string
{ {
return urldecode($this->getInput('timestamp')); return urldecode($this->getInput('timestamp') ?? '');
} }
/** /**
@@ -230,18 +240,18 @@ EOL, 'type' => 'checkbox',
* images or movies) from the item context * images or movies) from the item context
* @return string * @return string
*/ */
protected function getExpressionItemEnclosures() protected function getExpressionItemEnclosures(): string
{ {
return urldecode($this->getInput('enclosures')); return urldecode($this->getInput('enclosures') ?? '');
} }
/** /**
* XPath expression for extracting an item category from the item context * XPath expression for extracting an item category from the item context
* @return string * @return string
*/ */
protected function getExpressionItemCategories() protected function getExpressionItemCategories(): string
{ {
return urldecode($this->getInput('categories')); return urldecode($this->getInput('categories') ?? '');
} }
/** /**
@@ -260,6 +270,7 @@ EOL, 'type' => 'checkbox',
*/ */
private function encodeUri($uri) private function encodeUri($uri)
{ {
$uri = $uri ?? '';
if ( if (
strpos($uri, 'https%3A%2F%2F') === 0 strpos($uri, 'https%3A%2F%2F') === 0
|| strpos($uri, 'http%3A%2F%2F') === 0 || strpos($uri, 'http%3A%2F%2F') === 0