items[] = [ 'title' => $title, 'uri' => $url, 'uid' => $url, 'timestamp' => strtotime($lastmod), 'categories' => $category ? [$category] : [], 'content' => $this->fetchFullArticle($url), ]; if (count($this->items) >= self::MAX_ARTICLES) { break; } } } private function fetchFullArticle(string $url): string { $html = getSimpleHTMLDOMCached($url); if (!$html) { return 'Unable to fetch article content'; } $article = $html->find('article', 0); if (!$article) { return 'Unable to parse article content'; } $removeSelectors = [ 'script', 'style', 'div.links-bar', 'div.google-news-cta', 'div.a-wrapper', 'div.embed_youtube', ]; foreach ($removeSelectors as $selector) { foreach ($article->find($selector) as $element) { $element->outertext = ''; } } // Handle lazy-loaded images foreach ($article->find('img') as $img) { if (!empty($img->{'data-src'})) { $img->src = $img->{'data-src'}; unset($img->{'data-src'}); } } return $article->innertext; } }