2018-09-13 16:52:26 +05:00
|
|
|
|
<?php
|
2022-07-01 15:10:30 +02:00
|
|
|
|
|
2018-09-13 16:52:26 +05:00
|
|
|
|
class PikabuBridge extends BridgeAbstract
|
|
|
|
|
{
|
|
|
|
|
const NAME = 'Пикабу';
|
|
|
|
|
const URI = 'https://pikabu.ru';
|
2021-05-24 00:50:24 +05:00
|
|
|
|
const DESCRIPTION = 'Выводит посты по тегу, сообществу или пользователю';
|
2018-09-13 16:52:26 +05:00
|
|
|
|
const MAINTAINER = 'em92';
|
2022-07-01 15:10:30 +02:00
|
|
|
|
|
2019-06-02 00:35:18 +05:00
|
|
|
|
const PARAMETERS_FILTER = [
|
|
|
|
|
'name' => 'Фильтр',
|
|
|
|
|
'type' => 'list',
|
|
|
|
|
'values' => [
|
|
|
|
|
'Горячее' => 'hot',
|
|
|
|
|
'Свежее' => 'new',
|
|
|
|
|
],
|
2022-10-29 12:25:45 +02:00
|
|
|
|
'defaultValue' => 'hot',
|
2019-06-02 00:35:18 +05:00
|
|
|
|
];
|
2022-07-01 15:10:30 +02:00
|
|
|
|
|
2018-09-13 16:52:26 +05:00
|
|
|
|
const PARAMETERS = [
|
|
|
|
|
'По тегу' => [
|
|
|
|
|
'tag' => [
|
|
|
|
|
'name' => 'Тег',
|
|
|
|
|
'exampleValue' => 'it',
|
|
|
|
|
'required' => true
|
|
|
|
|
],
|
2019-06-02 00:35:18 +05:00
|
|
|
|
'filter' => self::PARAMETERS_FILTER
|
|
|
|
|
],
|
|
|
|
|
'По сообществу' => [
|
|
|
|
|
'community' => [
|
|
|
|
|
'name' => 'Сообщество',
|
|
|
|
|
'exampleValue' => 'linux',
|
|
|
|
|
'required' => true
|
|
|
|
|
],
|
|
|
|
|
'filter' => self::PARAMETERS_FILTER
|
2019-08-28 19:29:49 +05:00
|
|
|
|
],
|
|
|
|
|
'По пользователю' => [
|
|
|
|
|
'user' => [
|
|
|
|
|
'name' => 'Пользователь',
|
|
|
|
|
'exampleValue' => 'admin',
|
|
|
|
|
'required' => true
|
2022-07-01 15:10:30 +02:00
|
|
|
|
]
|
|
|
|
|
]
|
2018-09-13 16:52:26 +05:00
|
|
|
|
];
|
2022-07-01 15:10:30 +02:00
|
|
|
|
|
2019-06-02 00:35:18 +05:00
|
|
|
|
protected $title = null;
|
2022-07-01 15:10:30 +02:00
|
|
|
|
|
2018-09-13 16:52:26 +05:00
|
|
|
|
public function getURI()
|
|
|
|
|
{
|
|
|
|
|
if ($this->getInput('tag')) {
|
|
|
|
|
return self::URI . '/tag/' . rawurlencode($this->getInput('tag')) . '/' . rawurlencode($this->getInput('filter'));
|
2019-08-28 19:29:49 +05:00
|
|
|
|
} elseif ($this->getInput('user')) {
|
|
|
|
|
return self::URI . '/@' . rawurlencode($this->getInput('user'));
|
2019-06-02 00:35:18 +05:00
|
|
|
|
} elseif ($this->getInput('community')) {
|
|
|
|
|
$uri = self::URI . '/community/' . rawurlencode($this->getInput('community'));
|
|
|
|
|
if ($this->getInput('filter') != 'hot') {
|
|
|
|
|
$uri .= '/' . rawurlencode($this->getInput('filter'));
|
|
|
|
|
}
|
|
|
|
|
return $uri;
|
2018-09-13 16:52:26 +05:00
|
|
|
|
} else {
|
|
|
|
|
return parent::getURI();
|
2022-07-01 15:10:30 +02:00
|
|
|
|
}
|
2018-09-13 16:52:26 +05:00
|
|
|
|
}
|
2022-07-01 15:10:30 +02:00
|
|
|
|
|
2018-09-13 16:52:26 +05:00
|
|
|
|
public function getIcon()
|
|
|
|
|
{
|
|
|
|
|
return 'https://cs.pikabu.ru/assets/favicon.ico';
|
|
|
|
|
}
|
2022-07-01 15:10:30 +02:00
|
|
|
|
|
2018-09-13 16:52:26 +05:00
|
|
|
|
public function getName()
|
|
|
|
|
{
|
2019-06-02 00:35:18 +05:00
|
|
|
|
if (is_null($this->title)) {
|
2018-09-13 16:52:26 +05:00
|
|
|
|
return parent::getName();
|
2019-06-02 00:35:18 +05:00
|
|
|
|
} else {
|
|
|
|
|
return $this->title . ' - ' . parent::getName();
|
2018-09-13 16:52:26 +05:00
|
|
|
|
}
|
|
|
|
|
}
|
2022-07-01 15:10:30 +02:00
|
|
|
|
|
2018-09-13 16:52:26 +05:00
|
|
|
|
public function collectData()
|
|
|
|
|
{
|
|
|
|
|
$link = $this->getURI();
|
2022-07-01 15:10:30 +02:00
|
|
|
|
|
2021-05-24 00:50:24 +05:00
|
|
|
|
$text_html = getContents($link);
|
2018-09-13 16:52:26 +05:00
|
|
|
|
$text_html = iconv('windows-1251', 'utf-8', $text_html);
|
|
|
|
|
$html = str_get_html($text_html);
|
2022-07-01 15:10:30 +02:00
|
|
|
|
|
2019-06-02 00:35:18 +05:00
|
|
|
|
$this->title = $html->find('title', 0)->innertext;
|
2022-07-01 15:10:30 +02:00
|
|
|
|
|
2018-09-13 16:52:26 +05:00
|
|
|
|
foreach ($html->find('article.story') as $post) {
|
|
|
|
|
$time = $post->find('time.story__datetime', 0);
|
|
|
|
|
if (is_null($time)) {
|
|
|
|
|
continue;
|
2022-07-01 15:10:30 +02:00
|
|
|
|
}
|
|
|
|
|
|
2018-09-13 16:52:26 +05:00
|
|
|
|
$el_to_remove_selectors = [
|
|
|
|
|
'.story__read-more',
|
2021-05-24 00:50:24 +05:00
|
|
|
|
'script',
|
2018-09-13 16:52:26 +05:00
|
|
|
|
'svg.story-image__stretch',
|
|
|
|
|
];
|
2022-07-01 15:10:30 +02:00
|
|
|
|
|
2018-09-13 16:52:26 +05:00
|
|
|
|
foreach ($el_to_remove_selectors as $el_to_remove_selector) {
|
|
|
|
|
foreach ($post->find($el_to_remove_selector) as $el) {
|
2019-06-02 13:03:26 +02:00
|
|
|
|
$el->outertext = '';
|
2018-09-13 16:52:26 +05:00
|
|
|
|
}
|
2022-07-01 15:10:30 +02:00
|
|
|
|
}
|
|
|
|
|
|
2019-06-02 00:35:18 +05:00
|
|
|
|
foreach ($post->find('[data-type=gifx]') as $el) {
|
|
|
|
|
$src = $el->getAttribute('data-source');
|
|
|
|
|
$el->outertext = '<img src="' . $src . '">';
|
|
|
|
|
}
|
2022-07-01 15:10:30 +02:00
|
|
|
|
|
2018-09-13 16:52:26 +05:00
|
|
|
|
foreach ($post->find('img') as $img) {
|
|
|
|
|
$src = $img->getAttribute('src');
|
|
|
|
|
if (!$src) {
|
|
|
|
|
$src = $img->getAttribute('data-src');
|
|
|
|
|
if (!$src) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2022-07-01 15:10:30 +02:00
|
|
|
|
}
|
2018-11-05 12:55:58 +01:00
|
|
|
|
$img->outertext = '<img src="' . $src . '">';
|
2022-07-01 15:10:30 +02:00
|
|
|
|
|
2019-09-17 00:28:41 +05:00
|
|
|
|
// it is assumed, that img's parents are links to post itself
|
|
|
|
|
// we don't need them
|
|
|
|
|
$img->parent()->outertext = $img->outertext;
|
2018-09-13 16:52:26 +05:00
|
|
|
|
}
|
2022-07-01 15:10:30 +02:00
|
|
|
|
|
2018-09-13 16:52:26 +05:00
|
|
|
|
$categories = [];
|
|
|
|
|
foreach ($post->find('.tags__tag') as $tag) {
|
|
|
|
|
if ($tag->getAttribute('data-tag')) {
|
|
|
|
|
$categories[] = $tag->innertext;
|
|
|
|
|
}
|
|
|
|
|
}
|
2022-07-01 15:10:30 +02:00
|
|
|
|
|
2021-03-30 21:06:23 +03:00
|
|
|
|
$title_element = $post->find('.story__title-link', 0);
|
2022-08-25 21:09:06 +05:00
|
|
|
|
if (str_contains($title_element->href, 'from=cpm')) {
|
|
|
|
|
// skip sponsored posts
|
|
|
|
|
continue;
|
|
|
|
|
}
|
2022-07-01 15:10:30 +02:00
|
|
|
|
|
2021-03-30 21:06:23 +03:00
|
|
|
|
$title = $title_element->plaintext;
|
|
|
|
|
$community_link = $post->find('.story__community-link', 0);
|
|
|
|
|
// adding special marker for "Maybe News" section
|
|
|
|
|
// these posts are fake
|
|
|
|
|
if (!is_null($community_link) && $community_link->getAttribute('href') == '/community/maybenews') {
|
2021-07-24 00:30:12 +05:00
|
|
|
|
$title = '[' . trim($community_link->plaintext) . '] ' . $title;
|
2021-03-30 21:06:23 +03:00
|
|
|
|
}
|
2022-07-01 15:10:30 +02:00
|
|
|
|
|
2018-09-13 16:52:26 +05:00
|
|
|
|
$item = [];
|
|
|
|
|
$item['categories'] = $categories;
|
2022-10-29 15:14:12 +05:00
|
|
|
|
$item['author'] = trim($post->find('.user__nick', 0)->plaintext);
|
2021-03-30 21:06:23 +03:00
|
|
|
|
$item['title'] = $title;
|
2019-09-17 00:28:41 +05:00
|
|
|
|
$item['content'] = strip_tags(
|
|
|
|
|
backgroundToImg($post->find('.story__content-inner', 0)->innertext),
|
2021-07-24 00:30:52 +05:00
|
|
|
|
'<br><p><img><a><s>
|
2019-09-17 00:28:41 +05:00
|
|
|
|
'
|
|
|
|
|
);
|
2021-03-30 21:06:23 +03:00
|
|
|
|
$item['uri'] = $title_element->href;
|
2018-09-13 16:52:26 +05:00
|
|
|
|
$item['timestamp'] = strtotime($time->getAttribute('datetime'));
|
|
|
|
|
$this->items[] = $item;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|