diff --git a/bridges/DRKBlutspendeBridge.php b/bridges/DRKBlutspendeBridge.php index 15075898..7c415d5d 100644 --- a/bridges/DRKBlutspendeBridge.php +++ b/bridges/DRKBlutspendeBridge.php @@ -37,6 +37,26 @@ class DRKBlutspendeBridge extends FeedExpander ] ]; + const OFFER_LOW_PRIORITIES = [ + 'Imbiss nach der Blutspende', + 'Registrierung als Stammzellspender', + 'Typisierung möglich!', + 'Allgemeine Informationen', + 'Krankenkassen belohnen Blutspender', + 'Wer benötigt eigentlich eine Blutspende?', + 'Win-Win-Situation für die Gesundheit!', + 'Terminreservierung', + 'Du möchtest das erste Mal Blut spenden?', + 'Spende-Check', + 'Sie haben Fragen vor Ihrer Blutspende?' + ]; + + const IMAGE_PRIORITIES = [ + 'DRK', + 'Imbiss', + 'Obst', + ]; + public function collectData() { $limitItems = intval($this->getInput('limit_items')); @@ -45,37 +65,116 @@ class DRKBlutspendeBridge extends FeedExpander protected function parseItem(array $item) { - $html = getSimpleHTMLDOM($item['uri']); + $html = getSimpleHTMLDOMCached($item['uri']); $detailsElement = $html->find('.details', 0); - $dateElement = $detailsElement->find('.datum', 0); - $dateLines = self::explodeLines($dateElement->plaintext); - - $addressElement = $detailsElement->find('.adresse', 0); - $addressLines = self::explodeLines($addressElement->plaintext); + $dateLines = self::explodeLines($detailsElement->find('.datum', 0)->plaintext); + $addressLines = self::explodeLines($detailsElement->find('.adresse', 0)->plaintext); $infoElement = $detailsElement->find('.angebote > h4 + p', 0); - $info = $infoElement ? $infoElement->innertext : ''; + $info = $infoElement ? trim($infoElement->plaintext) : ''; - $imageElements = $detailsElement->find('.fotos img'); + $offers = self::parseOffers($detailsElement->find('.angebote .item')); - $item['title'] = $dateLines[0] . ' ' . $dateLines[1] . ' ' . $addressLines[0] . ' - ' . $addressLines[1]; + $images = self::parseImages($detailsElement->find('.fotos', 0)); + usort($images, function ($imageA, $imageB): int { + list($titleA) = $imageA; + list($titleB) = $imageB; + $prioA = 0; + $prioB = 0; + foreach (self::IMAGE_PRIORITIES as $prioIndex => $prioTitleNeedle) { + if (stripos($titleA, $prioTitleNeedle) !== false) { + $prioA = $prioIndex + 1; + } + if (stripos($titleB, $prioTitleNeedle) !== false) { + $prioB = $prioIndex + 1; + } + } + return $prioA - $prioB; + }); - $item['content'] = <<{$dateLines[0]} {$dateLines[1]}

-

{$addressElement->innertext}

-

{$info}

+ $itemContent = << +

+ {$dateLines[0]} {$dateLines[1]}
+ {$addressLines[3]} +

+

+ {$addressLines[0]}
+ {$addressLines[1]}
+ {$addressLines[2]} +

+ HTML; - foreach ($imageElements as $imageElement) { - $src = $imageElement->getAttribute('src'); - $item['content'] .= <<

+ if ($info) { + $itemContent .= << +

Infos

+

{$info}

+ HTML; } + $majorOffers = array_filter($offers, fn($title): bool => !in_array($title, self::OFFER_LOW_PRIORITIES), ARRAY_FILTER_USE_KEY); + foreach ($majorOffers as $offerTitle => list($offerText, $offerImages)) { + $itemContent .= << +

{$offerTitle}

+

{$offerText}

+ HTML; + foreach ($offerImages as list($imageTitle, $imageUrl)) { + $itemContent .= << + +
{$imageTitle}
+ + HTML; + } + $itemContent .= << + HTML; + } + + if (count($images) > 0) { + $itemContent .= << +

Fotos

+ HTML; + foreach ($images as list($imageTitle, $imageUrl)) { + $itemContent .= << + +
{$imageTitle}
+ + HTML; + } + $itemContent .= << + HTML; + } + + $minorOffers = array_filter($offers, fn($title): bool => in_array($title, self::OFFER_LOW_PRIORITIES), ARRAY_FILTER_USE_KEY); + foreach ($minorOffers as $offerTitle => list($offerText)) { + $itemContent .= << +

{$offerTitle}

+

{$offerText}

+ + HTML; + } + + $item['title'] = $dateLines[0] . ' ' . $dateLines[1] . ' ' . $addressLines[0] . ' - ' . $addressLines[1]; + $item['content'] = $itemContent; $item['description'] = null; + $item['enclosures'] = array_map( + function ($image): string { + list($title, $url) = $image; + return $url . '#' . urlencode(str_replace(' ', '_', $title)); + }, + $images + ); return $item; } @@ -97,6 +196,67 @@ class DRKBlutspendeBridge extends FeedExpander return self::BASE_URI . '/blutspendetermine/termine.rss?date_to=' . $dateTo . '&radius=' . $radius . '&term=' . $term; } + private function parseImages($parentElement): array + { + $images = []; + + if ($parentElement) { + $elements = $parentElement->find('a[data-lightbox]'); + foreach ($elements as $i => $element) { + $url = trim($element->getAttribute('href')); + if (!$url) { + continue; + } + + $title = trim($element->getAttribute('title')); + if (!$title) { + $number = $i + 1; + $title = "Foto {$number}"; + } + + $images[] = [$title, $url]; + } + } + + return $images; + } + + private function parseOffers($offerElements): array + { + $offers = []; + + foreach ($offerElements as $element) { + $title = self::getCleanPlainText($element->find(':is(h1,h2,h3,h4,h5,h6)', 0)); + $text = trim(substr(self::getCleanPlainText($element), strlen($title))); + if (!$title || !$text) { + continue; + } + + $linkElements = $element->find('a'); + foreach ($linkElements as $linkElement) { + $linkText = trim($linkElement->plaintext); + $linkUrl = trim($linkElement->getAttribute('href')); + if (!$linkText || !$linkUrl) { + continue; + } + + $linkHtml = <<{$linkText} + HTML; + $text = str_replace($linkText, $linkHtml, $text); + } + + $offers[$title] = [$text, self::parseImages($element)]; + } + + return $offers; + } + + private function getCleanPlainText($htmlElement): string + { + return $htmlElement ? trim(preg_replace('/\s+/', ' ', html_entity_decode($htmlElement->plaintext))) : ''; + } + /** * Returns an array of strings, each of which is a substring of string formed by splitting it on boundaries formed by line breaks. */