From 188184bdf2170cb0eba166f4d3036b85bc82d5e6 Mon Sep 17 00:00:00 2001
From: Felix Friebe <felix-friebe@gmx.de>
Date: Sun, 17 Nov 2019 00:42:08 -0600
Subject: [PATCH] Fixed MotherlessRipper

Issue #1: If the Homepage of a gallery is opened (path /Gxxxx), theres no next page (path /Gxxxx?page=2). If an homepage link is added, the path is now changed to the "All Uploads" page (path /GMxxxx)
Issue #2: All paths were changed to https:// since insecure connections didn't work with a user

Other Changes:
#1: MotherlessRipper class used to override run method. Now the original method from AbstractHTMLRipper class is used to avoid redundant code.
#2: MotherlessRipper class now implements the getNextPage method. Getting the next page was previously done by the run method. Also the link of the next page is now read from a link-tag in the HTML header and not "calculated" anymore.
---
 .../ripper/rippers/MotherlessRipper.java      | 70 ++++++++++---------
 1 file changed, 36 insertions(+), 34 deletions(-)

diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/MotherlessRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/MotherlessRipper.java
index 9b71d756..7bb8451a 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/MotherlessRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/MotherlessRipper.java
@@ -16,8 +16,11 @@ import com.rarchives.ripme.ripper.DownloadThreadPool;
 import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
 import com.rarchives.ripme.utils.Http;
 import com.rarchives.ripme.utils.Utils;
+import org.jsoup.select.Elements;
 
 public class MotherlessRipper extends AbstractHTMLRipper {
+    // All sleep times are in milliseconds
+    private static final int IMAGE_SLEEP_TIME    = 1000;
 
     private static final String DOMAIN = "motherless.com",
                                 HOST   = "motherless";
@@ -46,7 +49,32 @@ public class MotherlessRipper extends AbstractHTMLRipper {
 
     @Override
     protected Document getFirstPage() throws IOException {
-        return Http.url(url).referrer("http://motherless.com").get();
+        URL firstURL = this.url;
+        String path = this.url.getPath();
+        // Check if "All Uploads" (/GMxxxx), Image (/GIxxxx) or Video (/GVxxxx) gallery since there's no "next" after the homepage (/Gxxxx)
+        Pattern p = Pattern.compile("[MIV]");
+        Matcher m = p.matcher(String.valueOf(path.charAt(2)));
+        boolean notHome = m.matches();
+        // If it's the homepage go to the "All Uploads" gallery (/Gxxxxx -> /GMxxxxx)
+        if (!notHome) {
+            StringBuilder newPath = new StringBuilder(path);
+            newPath.insert(2, "M");
+            firstURL = new URL(this.url, "https://" + DOMAIN + newPath);
+            LOGGER.info("Changed URL to " + firstURL);
+        }
+        return Http.url(firstURL).referrer("https://motherless.com").get();
+    }
+
+    @Override
+    public Document getNextPage(Document doc) throws IOException {
+        Elements nextPageLink = doc.head().select("link[rel=next]");
+        if (nextPageLink.isEmpty()) {
+            throw new IOException("Last page reached");
+        } else {
+            String referrerLink = doc.head().select("link[rel=canonical]").first().attr("href");
+            URL nextURL = new URL(this.url, nextPageLink.first().attr("href"));
+            return Http.url(nextURL).referrer(referrerLink).get();
+        }
     }
 
     @Override
@@ -64,7 +92,7 @@ public class MotherlessRipper extends AbstractHTMLRipper {
 
             String url;
             if (!thumbURL.startsWith("http")) {
-                url = "http://" + DOMAIN + thumbURL;
+                url = "https://" + DOMAIN + thumbURL;
             } else {
                 url = thumbURL;
             }
@@ -83,6 +111,11 @@ public class MotherlessRipper extends AbstractHTMLRipper {
         // Create thread for finding image at "url" page
         MotherlessImageThread mit = new MotherlessImageThread(url, index);
         motherlessThreadPool.addThread(mit);
+        try {
+            Thread.sleep(IMAGE_SLEEP_TIME);
+        } catch (InterruptedException e) {
+            LOGGER.warn("Interrupted while waiting to load next image", e);
+        }
     }
 
     @Override
@@ -112,40 +145,9 @@ public class MotherlessRipper extends AbstractHTMLRipper {
         if (m.matches()) {
             return m.group(m.groupCount());
         }
-        throw new MalformedURLException("Expected URL format: http://motherless.com/GIXXXXXXX, got: " + url);
+        throw new MalformedURLException("Expected URL format: https://motherless.com/GIXXXXXXX, got: " + url);
     }
 
-    @Override
-    public void rip() throws IOException {
-        int index = 0, page = 1;
-        String nextURL = this.url.toExternalForm();
-        while (nextURL != null) {
-            if (isStopped()) {
-                break;
-            }
-            LOGGER.info("Retrieving " + nextURL);
-            sendUpdate(STATUS.LOADING_RESOURCE, nextURL);
-            Document doc = getFirstPage();
-            List<String> URLs = getURLsFromPage(doc);
-
-            for (String url: URLs) {
-                downloadURL(new URL(url), index);
-                index ++;
-            }
-
-            if (isThisATest()) {
-                break;
-            }
-            // Next page
-            nextURL = null;
-            page++;
-            if (doc.html().contains("?page=" + page)) {
-                nextURL = this.url.toExternalForm() + "?page=" + page;
-            }
-        }
-        motherlessThreadPool.waitForThreads();
-        waitForThreads();
-    }
 
     /**
      * Helper class to find and download images found on "image" pages