mirror of
https://github.com/RSS-Bridge/rss-bridge.git
synced 2025-07-31 13:50:23 +02:00
[BridgeXPathAbstract + BlizzardNewsBridge + XPathBridge] Add new abstract class + two example implementations (#1671)
This commit is contained in:
251
bridges/XPathBridge.php
Normal file
251
bridges/XPathBridge.php
Normal file
@@ -0,0 +1,251 @@
|
||||
<?php
|
||||
|
||||
class XPathBridge extends XPathAbstract {
|
||||
const NAME = 'XPathBridge';
|
||||
const URI = 'https://github.com/rss-bridge/rss-bridge';
|
||||
const DESCRIPTION
|
||||
= 'Parse any webpage using <a href="https://devhints.io/xpath" target="_blank">XPath expressions</a>';
|
||||
const MAINTAINER = 'Niehztog';
|
||||
const PARAMETERS = array(
|
||||
'' => array(
|
||||
|
||||
'url' => array(
|
||||
'name' => 'Enter web page URL',
|
||||
'title' => <<<"EOL"
|
||||
You can specify any website URL which serves data suited for display in RSS feeds
|
||||
(for example a news blog).
|
||||
EOL
|
||||
, 'type' => 'text',
|
||||
'exampleValue' => 'https://news.blizzard.com/en-en',
|
||||
'defaultValue' => 'https://news.blizzard.com/en-en',
|
||||
'required' => true
|
||||
),
|
||||
|
||||
'item' => array(
|
||||
'name' => 'Item selector',
|
||||
'title' => <<<"EOL"
|
||||
Enter an XPath expression matching a list of dom nodes, each node containing one
|
||||
feed article item in total (usually a surrounding <div> or <span> tag). This will
|
||||
be the context nodes for all of the following expressions. This expression usually
|
||||
starts with a single forward slash.
|
||||
EOL
|
||||
, 'type' => 'text',
|
||||
'exampleValue' => '/html/body/div/div[4]/div[2]/div[2]/div/div/section/ol/li/article',
|
||||
'defaultValue' => '/html/body/div/div[4]/div[2]/div[2]/div/div/section/ol/li/article',
|
||||
'required' => true
|
||||
),
|
||||
|
||||
'title' => array(
|
||||
'name' => 'Item title selector',
|
||||
'title' => <<<"EOL"
|
||||
This expression should match a node contained within each article item node
|
||||
containing the article headline. It should start with a dot followed by two
|
||||
forward slashes, referring to any descendant nodes of the article item node.
|
||||
EOL
|
||||
, 'type' => 'text',
|
||||
'exampleValue' => './/div/div[2]/h2',
|
||||
'defaultValue' => './/div/div[2]/h2',
|
||||
'required' => true
|
||||
),
|
||||
|
||||
'content' => array(
|
||||
'name' => 'Item description selector',
|
||||
'title' => <<<"EOL"
|
||||
This expression should match a node contained within each article item node
|
||||
containing the article content or description. It should start with a dot
|
||||
followed by two forward slashes, referring to any descendant nodes of the
|
||||
article item node.
|
||||
EOL
|
||||
, 'type' => 'text',
|
||||
'exampleValue' => './/div[@class="ArticleListItem-description"]/div[@class="h6"]',
|
||||
'defaultValue' => './/div[@class="ArticleListItem-description"]/div[@class="h6"]',
|
||||
'required' => false
|
||||
),
|
||||
|
||||
'uri' => array(
|
||||
'name' => 'Item URL selector',
|
||||
'title' => <<<"EOL"
|
||||
This expression should match a node's attribute containing the article URL
|
||||
(usually the href attribute of an <a> tag). It should start with a dot
|
||||
followed by two forward slashes, referring to any descendant nodes of
|
||||
the article item node. Attributes can be selected by prepending an @ char
|
||||
before the attributes name.
|
||||
EOL
|
||||
, 'type' => 'text',
|
||||
'exampleValue' => './/a[@class="ArticleLink ArticleLink"]/@href',
|
||||
'defaultValue' => './/a[@class="ArticleLink ArticleLink"]/@href',
|
||||
'required' => false
|
||||
),
|
||||
|
||||
'author' => array(
|
||||
'name' => 'Item author selector',
|
||||
'title' => <<<"EOL"
|
||||
This expression should match a node contained within each article item
|
||||
node containing the article author's name. It should start with a dot
|
||||
followed by two forward slashes, referring to any descendant nodes of
|
||||
the article item node.
|
||||
EOL
|
||||
, 'type' => 'text',
|
||||
'required' => false
|
||||
),
|
||||
|
||||
'timestamp' => array(
|
||||
'name' => 'Item date selector',
|
||||
'title' => <<<"EOL"
|
||||
This expression should match a node or node's attribute containing the
|
||||
article timestamp or date (parsable by PHP's strtotime function). It
|
||||
should start with a dot followed by two forward slashes, referring to
|
||||
any descendant nodes of the article item node. Attributes can be
|
||||
selected by prepending an @ char before the attributes name.
|
||||
EOL
|
||||
, 'type' => 'text',
|
||||
'exampleValue' => './/time[@class="ArticleListItem-footerTimestamp"]/@timestamp',
|
||||
'defaultValue' => './/time[@class="ArticleListItem-footerTimestamp"]/@timestamp',
|
||||
'required' => false
|
||||
),
|
||||
|
||||
'enclosures' => array(
|
||||
'name' => 'Item image selector',
|
||||
'title' => <<<"EOL"
|
||||
This expression should match a node's attribute containing an article
|
||||
image URL (usually the src attribute of an <img> tag or a style
|
||||
attribute). It should start with a dot followed by two forward slashes,
|
||||
referring to any descendant nodes of the article item node. Attributes
|
||||
can be selected by prepending an @ char before the attributes name.
|
||||
EOL
|
||||
, 'type' => 'text',
|
||||
'exampleValue' => './/div[@class="ArticleListItem-image"]/@style',
|
||||
'defaultValue' => './/div[@class="ArticleListItem-image"]/@style',
|
||||
'required' => false
|
||||
),
|
||||
|
||||
'categories' => array(
|
||||
'name' => 'Item category selector',
|
||||
'title' => <<<"EOL"
|
||||
This expression should match a node or node's attribute contained
|
||||
within each article item node containing the article category. This
|
||||
could be inside <div> or <span> tags or sometimes be hidden
|
||||
in a data attribute. It should start with a dot followed by two
|
||||
forward slashes, referring to any descendant nodes of the article
|
||||
item node. Attributes can be selected by prepending an @ char
|
||||
before the attributes name.
|
||||
EOL
|
||||
, 'type' => 'text',
|
||||
'exampleValue' => './/div[@class="ArticleListItem-label"]',
|
||||
'defaultValue' => './/div[@class="ArticleListItem-label"]',
|
||||
'required' => false
|
||||
),
|
||||
|
||||
'fix_encoding' => array(
|
||||
'name' => 'Fix encoding',
|
||||
'title' => <<<"EOL"
|
||||
Check this to fix feed encoding by invoking PHP's utf8_decode
|
||||
function on all extracted texts. Try this in case you see "broken" or
|
||||
"weird" characters in your feed where you'd normally expect umlauts
|
||||
or any other non-ascii characters.
|
||||
EOL
|
||||
, 'type' => 'checkbox',
|
||||
'required' => false
|
||||
),
|
||||
|
||||
)
|
||||
);
|
||||
|
||||
/**
|
||||
* Source Web page URL (should provide either HTML or XML content)
|
||||
* @return string
|
||||
*/
|
||||
protected function getSourceUrl(){
|
||||
return $this->encodeUri($this->getInput('url'));
|
||||
}
|
||||
|
||||
/**
|
||||
* XPath expression for extracting the feed items from the source page
|
||||
* @return string
|
||||
*/
|
||||
protected function getExpressionItem(){
|
||||
return urldecode($this->getInput('item'));
|
||||
}
|
||||
|
||||
/**
|
||||
* XPath expression for extracting an item title from the item context
|
||||
* @return string
|
||||
*/
|
||||
protected function getExpressionItemTitle(){
|
||||
return urldecode($this->getInput('title'));
|
||||
}
|
||||
|
||||
/**
|
||||
* XPath expression for extracting an item's content from the item context
|
||||
* @return string
|
||||
*/
|
||||
protected function getExpressionItemContent(){
|
||||
return urldecode($this->getInput('content'));
|
||||
}
|
||||
|
||||
/**
|
||||
* XPath expression for extracting an item link from the item context
|
||||
* @return string
|
||||
*/
|
||||
protected function getExpressionItemUri(){
|
||||
return urldecode($this->getInput('uri'));
|
||||
}
|
||||
|
||||
/**
|
||||
* XPath expression for extracting an item author from the item context
|
||||
* @return string
|
||||
*/
|
||||
protected function getExpressionItemAuthor(){
|
||||
return urldecode($this->getInput('author'));
|
||||
}
|
||||
|
||||
/**
|
||||
* XPath expression for extracting an item timestamp from the item context
|
||||
* @return string
|
||||
*/
|
||||
protected function getExpressionItemTimestamp(){
|
||||
return urldecode($this->getInput('timestamp'));
|
||||
}
|
||||
|
||||
/**
|
||||
* XPath expression for extracting item enclosures (media content like
|
||||
* images or movies) from the item context
|
||||
* @return string
|
||||
*/
|
||||
protected function getExpressionItemEnclosures(){
|
||||
return urldecode($this->getInput('enclosures'));
|
||||
}
|
||||
|
||||
/**
|
||||
* XPath expression for extracting an item category from the item context
|
||||
* @return string
|
||||
*/
|
||||
protected function getExpressionItemCategories(){
|
||||
return urldecode($this->getInput('categories'));
|
||||
}
|
||||
|
||||
/**
|
||||
* Fix encoding
|
||||
* @return string
|
||||
*/
|
||||
protected function getSettingFixEncoding(){
|
||||
return $this->getInput('fix_encoding');
|
||||
}
|
||||
|
||||
/**
|
||||
* Fixes URL encoding issues in input URL's
|
||||
* @param $uri
|
||||
* @return string|string[]
|
||||
*/
|
||||
private function encodeUri($uri)
|
||||
{
|
||||
if (strpos($uri, 'https%3A%2F%2F') === 0
|
||||
|| strpos($uri, 'http%3A%2F%2F') === 0) {
|
||||
$uri = urldecode($uri);
|
||||
}
|
||||
|
||||
$uri = str_replace('|', '%7C', $uri);
|
||||
|
||||
return $uri;
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user