From 3ae7a102230471765c27eeec808c983f239a64a2 Mon Sep 17 00:00:00 2001 From: July Date: Wed, 29 Jan 2025 05:11:25 -0500 Subject: [PATCH] [GovTrackBridge] Rebase on top of official RSS feed (#4429) --- bridges/GovTrackBridge.php | 92 ++++++++++++++++---------------------- 1 file changed, 39 insertions(+), 53 deletions(-) diff --git a/bridges/GovTrackBridge.php b/bridges/GovTrackBridge.php index 4674668a..a2c18d9f 100644 --- a/bridges/GovTrackBridge.php +++ b/bridges/GovTrackBridge.php @@ -1,6 +1,6 @@ 'major-bill-activity', 'New Bills and Resolutions' => 'introduced-bills', 'New Laws' => 'enacted-bills', - 'Posts from Us' => 'posts' - ] - ], - 'limit' => self::LIMIT + 'News from Us' => 'posts' + ] + ], + 'limit' => self::LIMIT ]]; public function collectData() { - $html = getSimpleHTMLDOMCached($this->getURI()); - if ($this->getInput('feed') != 'posts') { - $this->collectEvent($html); - return; - } - - $html = defaultLinkTo($html, parent::getURI()); - $limit = $this->getInput('limit') ?? 10; - foreach ($html->find('section') as $element) { - if (--$limit == 0) { - break; - } - - $info = explode(' ', $element->find('p', 0)->innertext); - $item = [ - 'categories' => [implode(' ', array_slice($info, 4))], - 'timestamp' => strtotime(implode(' ', array_slice($info, 0, 3))), - 'title' => $element->find('a', 0)->innertext, - 'uri' => $element->find('a', 0)->href, - ]; - - $html = getSimpleHTMLDOMCached($item['uri']); - $html = defaultLinkTo($html, parent::getURI()); - - $content = $html->find('#content .col-md', 1); - $info = explode(' by ', $content->find('p', 0)->plaintext); - $content->removeChild($content->firstChild()); - - $item['author'] = implode(' ', array_slice($info, 1)); - $item['content'] = $content->innertext; - - $this->items[] = $item; + $limit = $this->getInput('limit') ?? 15; + if ($this->getInput('feed') == 'posts') { + $this->collectExpandableDatas($this->getURI() . '.rss', $limit); + } else { + $this->collectEvent($this->getURI(), $limit); } } - private function collectEvent($html) + protected function parseItem(array $item) { - $opt = []; - preg_match('/"csrfmiddlewaretoken" value="(.*)"/', $html, $opt); + $html = getSimpleHTMLDOMCached($item['uri']); + $html = defaultLinkTo($html, parent::getURI()); + + $item['categories'] = [$html->find('.breadcrumb-item', 1)->plaintext]; + $content = $html->find('#content .col-md', 1); + $item['author'] = explode(' by ', $content->firstChild()->plaintext)[1]; + $content->removeChild($content->firstChild()); + $item['content'] = $content->innertext; + + return $item; + } + + private function collectEvent($uri, $limit) + { + $html = getSimpleHTMLDOMCached($uri); + preg_match('/"csrfmiddlewaretoken" value="(.*)"/', $html, $preg); $header = [ - "cookie: csrftoken=$opt[1]", - "x-csrftoken: $opt[1]", + "cookie: csrftoken=$preg[1]", + "x-csrftoken: $preg[1]", 'referer: ' . parent::getURI(), ]; - preg_match('/var selected_feed = "(.*)";/', $html, $opt); - $post = [ - 'count' => $this->getInput('limit') ?? 20, - 'feed' => $opt[1] - ]; - $opt = [ CURLOPT_POSTFIELDS => $post ]; + preg_match('/var selected_feed = "(.*)";/', $html, $preg); + $opt = [ CURLOPT_POSTFIELDS => [ + 'count' => $limit, + 'feed' => $preg[1] + ]]; $html = getContents(parent::getURI() . 'events/_load_events', $header, $opt); $html = defaultLinkTo(str_get_html($html), parent::getURI()); @@ -83,10 +69,10 @@ class GovTrackBridge extends BridgeAbstract foreach ($html->find('.tracked_event') as $event) { $bill = $event->find('.event_title a, .event_body a', 0); $date = explode(' ', $event->find('.event_date', 0)->plaintext); - preg_match('/Sponsor:(.*)\n/', $event->plaintext, $opt); + preg_match('/Sponsor:(.*)\n/', $event->plaintext, $preg); $item = [ - 'author' => $opt[1] ?? '', + 'author' => $preg[1] ?? '', 'content' => $event->find('td', 1)->innertext, 'enclosures' => [$event->find('img', 0)->src], 'timestamp' => strtotime(implode(' ', array_slice($date, 2))), @@ -115,10 +101,10 @@ class GovTrackBridge extends BridgeAbstract public function getURI() { - if ($this->getInput('feed') != 'posts') { - $url = parent::getURI() . 'events/' . $this->getInput('feed'); - } else { + if ($this->getInput('feed') == 'posts') { $url = parent::getURI() . $this->getInput('feed'); + } else { + $url = parent::getURI() . 'events/' . $this->getInput('feed'); } return $url; }