find('div[class=listArticles]', 0)->children(); foreach ($list as $element) { if ($element->tag == 'a') { $articleURL = self::URI . $element->href; $article = getSimpleHTMLDOM($articleURL); $this->rewriteAudioPlayers($article); // Reload the modified content $article = str_get_html($article->save()); $textDOM = $article->find('article', 0); // Initialise arrays $item = []; $audio = []; $picture = []; // Get the Main picture URL $picture[] = self::URI . $article->find('figure[class*=photoviewer]', 0)->find('img', 0)->src; $audioHTML = $article->find('audio'); // Add the audio element to the enclosure foreach ($audioHTML as $audioElement) { $audioURL = $audioElement->src; $audio[] = $audioURL; } // Rewrite pictures URL $imgs = $textDOM->find('img[src^="http://www.radiomelodie.com/image.php]'); foreach ($imgs as $img) { $img->src = $this->rewriteImage($img->src); $article->save(); } // Remove Google Ads $ads = $article->find('div[class=adInline]'); foreach ($ads as $ad) { $ad->outertext = ''; $article->save(); } // Extract the author $author = $article->find('div[class=author]', 0)->children(1)->children(0)->plaintext; // Handle date to timestamp $dateHTML = $article->find('div[class=author]', 0)->children(1)->plaintext; preg_match('/([a-z]{4,10}[ ]{1,2}[0-9]{1,2} [\p{L}]{3,10} [0-9]{4} à [0-9]{2}:[0-9]{2})/mus', $dateHTML, $matches); $dateText = $matches[1]; $timestamp = $this->parseDate($dateText); $item['enclosures'] = array_merge($picture, $audio); $item['author'] = $author; $item['uri'] = $articleURL; $item['title'] = $article->find('meta[property=og:title]', 0)->content; if ($timestamp !== false) { $item['timestamp'] = $timestamp; } // Remove the share article part $textDOM->find('div[class=share]', 0)->outertext = ''; // Rewrite relative Links $textDOM = defaultLinkTo($textDOM, self::URI . '/'); $article->save(); $text = $textDOM->innertext; $item['content'] = '

' . $item['title'] . '

' . $dateText . '
' . $text; $this->items[] = $item; } } } /* * Function to rewrite image URL to use the real Image URL and not the resized one (which is very slow) */ private function rewriteImage($url) { $parts = explode('?', $url); parse_str(html_entity_decode($parts[1]), $params); return self::URI . '/' . $params['image']; } /* * Function to rewrite Audio Players to use the