From dc6928316b0740cc359b94ce83040285c76d9dcd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pierre=20Mazi=C3=A8re?= <pierre.maziere@gmx.com>
Date: Sun, 19 Jun 2016 00:41:02 +0200
Subject: [PATCH] add bridge for LWN Free Weekly Edition
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Pierre Mazière <pierre.maziere@gmx.com>
---
 bridges/LWNprevBridge.php | 156 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 156 insertions(+)
 create mode 100644 bridges/LWNprevBridge.php

diff --git a/bridges/LWNprevBridge.php b/bridges/LWNprevBridge.php
new file mode 100644
index 00000000..54fb71db
--- /dev/null
+++ b/bridges/LWNprevBridge.php
@@ -0,0 +1,156 @@
+<?php
+/**
+* RssBridgeLWNprev
+*
+* @name LWNPrev Bridge
+* @description Returns the articles from the previous LWN.net edition
+ */
+class LWNprevBridge extends BridgeAbstract{
+  public function loadMetadatas() {
+
+    $this->maintainer = 'Pierre Mazière';
+    $this->name = 'LWN Free Weekly Edition';
+    $this->uri = 'https://lwn.net/free/bigpage';
+    $this->description = 'LWN Free Weekly Edition available one week late';
+    $this->update = '2016-19-01';
+
+  }
+
+  private function jumpToNextTag(&$node){
+    while($node && $node->nodeType===XML_TEXT_NODE){
+      $nextNode=$node->nextSibling;
+      if(!$nextNode){
+        break;
+      }
+      $node=$nextNode;
+    }
+  }
+
+  private function jumpToPreviousTag(&$node){
+    while($node && $node->nodeType===XML_TEXT_NODE){
+      $previousNode=$node->previousSibling;
+      if(!$previousNode){
+        break;
+      }
+      $node=$previousNode;
+    }
+  }
+
+  public function collectData(array $param){
+    // Because the LWN page is written in loose HTML and not XHTML,
+    // Simple HTML Dom is not accurate enough for the job
+    $html = advanced_file_get_contents('https://lwn.net/free/bigpage')
+      or $this->returnError('No results for LWNprev', 404);
+
+    libxml_use_internal_errors(true);
+    $html=DOMDocument::loadHTML($html);
+    libxml_clear_errors();
+
+    $cat1='';
+    $cat2='';
+
+    $realURI='https://lwn.net';
+    foreach($html->getElementsByTagName('a') as $a){
+      if($a->textContent==='Multi-page format'){
+        break;
+      }
+    }
+    $realURI.=$a->getAttribute('href');
+    $URICounter=0;
+
+    $edition=$html->getElementsByTagName('h1')->item(0)->textContent;
+    $editionTimeStamp=strtotime(
+      substr($edition,strpos($edition,'for ')+strlen('for '))
+    );
+
+    foreach($html->getElementsByTagName('h2') as $h2){
+      if($h2->getAttribute('class')!=='SummaryHL'){
+        continue;
+      }
+
+      $item = new \Item();
+
+      $h2NextSibling=$h2->nextSibling;
+      $this->jumpToNextTag($h2NextSibling);
+
+      switch($h2NextSibling->getAttribute('class')){
+        case 'FeatureByline':
+          $item->name=$h2NextSibling->getElementsByTagName('b')->item(0)->textContent;
+          break;
+        case 'GAByline':
+          $text=$h2NextSibling->textContent;
+          $item->name=substr($text,strpos($text,'by '));
+          break;
+        default:
+          $item->name='LWN';
+          break;
+      };
+
+      $h2FirstChild=$h2->firstChild;
+      $this->jumpToNextTag($h2FirstChild);
+      if($h2FirstChild->tagName==='a'){
+        $item->uri='https://lwn.net'.$h2FirstChild->getAttribute('href');
+      }else{
+        $item->uri=$realURI.'#'.$URICounter;
+      }
+      $URICounter++;
+
+      $item->timestamp=$editionTimeStamp+$URICounter;
+
+      $h2PrevSibling=$h2->previousSibling;
+      $this->jumpToPreviousTag($h2PrevSibling);
+      switch($h2PrevSibling->getAttribute('class')){
+      case 'Cat2HL':
+        $cat2=$h2PrevSibling->textContent;
+        $h2PrevSibling=$h2PrevSibling->previousSibling;
+        $this->jumpToPreviousTag($h2PrevSibling);
+        if($h2PrevSibling->getAttribute('class')!=='Cat1HL'){
+          break;
+        }
+        $cat1=$h2PrevSibling->textContent;
+        break;
+      case 'Cat1HL':
+        $cat1=$h2PrevSibling->textContent;
+        $cat2='';
+        break;
+      default:
+        break;
+      }
+      $h2PrevSibling=null;
+
+      $item->title='['.$cat1.($cat2?'/'.$cat2:'').'] '.$h2->textContent;
+      $node=$h2;
+      $content='';
+      $contentEnd=false;
+      while(!$contentEnd){
+        $node=$node->nextSibling;
+        if(
+          !$node || (
+            $node->nodeType!==XML_TEXT_NODE && (
+              $node->tagName==='h2' ||
+              in_array($node->getAttribute('class'),array('Cat1HL','Cat2HL'))
+            )
+          )
+        ){
+        $contentEnd=true;
+        }else{
+          $content.=$node->C14N();
+        }
+      }
+      $item->content=$content;
+      $this->items[]=$item;
+    }
+  }
+
+  public function getName(){
+    return 'LWN Free Weekly Edition';
+  }
+
+  public function getURI(){
+    return 'https://lwn.net/free/bigpage';
+  }
+
+  public function getCacheDuration(){
+    return 604800; // one week
+  }
+}