From 4bf94d7f8656693cb49410abcf6e3fe8f8856054 Mon Sep 17 00:00:00 2001
From: Peter Szakacs <petersz256@gmail.com>
Date: Sat, 27 Oct 2018 20:49:05 +0200
Subject: [PATCH 1/7] Make TeenplanetRipper inherit from AbstractHTMLRipper,
 not AlbumRipper directly

---
 .../ripper/rippers/TeenplanetRipper.java      | 87 ++++++++-----------
 1 file changed, 34 insertions(+), 53 deletions(-)
diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/TeenplanetRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/TeenplanetRipper.java
index d25ef345..9791ab90 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/TeenplanetRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/TeenplanetRipper.java
@@ -3,51 +3,66 @@ package com.rarchives.ripme.ripper.rippers;
 import java.io.IOException;
 import java.net.MalformedURLException;
 import java.net.URL;
+import java.util.ArrayList;
+import java.util.List;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
+import com.rarchives.ripme.ripper.AbstractHTMLRipper;
 import org.jsoup.nodes.Document;
 import org.jsoup.nodes.Element;
-import org.jsoup.select.Elements;
 
-import com.rarchives.ripme.ripper.AlbumRipper;
-import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
 import com.rarchives.ripme.utils.Http;
 import com.rarchives.ripme.utils.Utils;
 
-public class TeenplanetRipper extends AlbumRipper {
+public class TeenplanetRipper extends AbstractHTMLRipper {
 
     private static final String DOMAIN = "teenplanet.org",
                                 HOST   = "teenplanet";
 
-    private Document albumDoc = null;
-
     public TeenplanetRipper(URL url) throws IOException {
         super(url);
     }
 
+    @Override
+    protected String getDomain() {
+        return DOMAIN;
+    }
+
     @Override
     public String getHost() {
         return HOST;
     }
 
-    public URL sanitizeURL(URL url) throws MalformedURLException {
-        return url;
+    @Override
+    protected Document getFirstPage() throws IOException {
+        return Http.url(url).get();
     }
 
-    public String getAlbumTitle(URL url) throws MalformedURLException {
-        try {
-            // Attempt to use album title as GID
-            if (albumDoc == null) {
-                albumDoc = Http.url(url).get();
+    @Override
+    protected List<String> getURLsFromPage(Document page) {
+        List<String> imageURLs = new ArrayList<>();
+        for (Element thumb : page.select("#galleryImages > a > img")) {
+            if (!thumb.hasAttr("src")) {
+                continue;
             }
-            Elements elems = albumDoc.select("div.header > h2");
-            return HOST + "_" + elems.get(0).text();
-        } catch (Exception e) {
-            // Fall back to default album naming convention
-            e.printStackTrace();
+            String imageURL = thumb.attr("src");
+            imageURL = imageURL.replace(
+                    "/thumbs/",
+                    "/");
+            imageURLs.add(imageURL);
         }
-        return super.getAlbumTitle(url);
+        System.out.println("Found" + imageURLs.size() + " image urls");
+        return imageURLs;
+    }
+
+    @Override
+    protected void downloadURL(URL url, int index) {
+        String prefix = "";
+        if (Utils.getConfigBoolean("download.save_order", true)) {
+            prefix = String.format("%03d_", index);
+        }
+        addURLToDownload(url, prefix);
     }
 
     @Override
@@ -65,38 +80,4 @@ public class TeenplanetRipper extends AlbumRipper {
                         + "teenplanet.org/galleries/....html"
                         + " Got: " + url);
     }
-
-    @Override
-    public void rip() throws IOException {
-        int index = 0;
-        LOGGER.info("Retrieving " + this.url);
-        sendUpdate(STATUS.LOADING_RESOURCE, this.url.toExternalForm());
-        if (albumDoc == null) {
-            albumDoc = Http.url(url).get();
-        }
-        for (Element thumb : albumDoc.select("#galleryImages > a > img")) {
-            if (!thumb.hasAttr("src")) {
-                continue;
-            }
-            String image = thumb.attr("src");
-            image = image.replace(
-                    "/thumbs/",
-                    "/");
-            index += 1;
-            String prefix = "";
-            if (Utils.getConfigBoolean("download.save_order", true)) {
-                prefix = String.format("%03d_", index);
-            }
-            addURLToDownload(new URL(image), prefix);
-            if (isThisATest()) {
-                break;
-            }
-        }
-        waitForThreads();
-    }
-
-    public boolean canRip(URL url) {
-        return url.getHost().endsWith(DOMAIN);
-    }
-
 }
\ No newline at end of file

From 48810259159945ffc089f79ef7b22e0680668dc1 Mon Sep 17 00:00:00 2001
From: Peter Szakacs <petersz256@gmail.com>
Date: Sat, 27 Oct 2018 22:13:01 +0200
Subject: [PATCH 2/7] Make NewsfilterRipper inherit from AbstractHTMLRipper

Also add basic unit test (except getGID() and getAlbumTitle()) for this ripper.
---
 .../ripper/rippers/NewsfilterRipper.java      | 76 ++++++++++---------
 .../ripper/rippers/NewsfilterRipperTest.java  |  6 +-
 2 files changed, 44 insertions(+), 38 deletions(-)

diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/NewsfilterRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/NewsfilterRipper.java
index 6454c508..eee733db 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/NewsfilterRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/NewsfilterRipper.java
@@ -3,9 +3,13 @@ package com.rarchives.ripme.ripper.rippers;
 import java.io.IOException;
 import java.net.MalformedURLException;
 import java.net.URL;
+import java.util.ArrayList;
+import java.util.List;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
+import com.rarchives.ripme.ripper.AbstractHTMLRipper;
+import com.rarchives.ripme.utils.Http;
 import org.jsoup.Connection;
 import org.jsoup.Jsoup;
 import org.jsoup.nodes.Document;
@@ -14,21 +18,15 @@ import org.jsoup.select.Elements;
 
 import com.rarchives.ripme.ripper.AlbumRipper;
 
-public class NewsfilterRipper extends AlbumRipper {
+public class NewsfilterRipper extends AbstractHTMLRipper {
+
     private static final String HOST = "newsfilter";
+    private static final String DOMAIN = "newsfilter.org";
 
     public NewsfilterRipper(URL url) throws IOException {
         super(url);
     }
 
-    @Override
-    public boolean canRip(URL url) {
-        //http://newsfilter.org/gallery/he-doubted-she-would-fuck-on-cam-happy-to-be-proven-wrong-216799
-        Pattern p = Pattern.compile("^https?://([wm]+\\.)?newsfilter\\.org/gallery/.+$");
-        Matcher m = p.matcher(url.toExternalForm());
-        return m.matches();
-    }
-
     @Override
     public URL sanitizeURL(URL url) throws MalformedURLException {
         String u = url.toExternalForm();
@@ -40,27 +38,15 @@ public class NewsfilterRipper extends AlbumRipper {
     }
 
     @Override
-    public void rip() throws IOException {
-        String gid = getGID(this.url);
-        String theurl = "http://newsfilter.org/gallery/" + gid;
-        LOGGER.info("Loading " + theurl);
-
-        Connection.Response resp = Jsoup.connect(theurl)
-            .timeout(5000)
-            .referrer("")
-            .userAgent(USER_AGENT)
-            .method(Connection.Method.GET)
-            .execute();
-        Document doc = resp.parse();
-
-        Elements thumbnails = doc.select("#galleryImages .inner-block img");
-        for (Element thumb : thumbnails) {
-            String thumbUrl = thumb.attr("src");
-            String picUrl = thumbUrl.replace("thumbs/", "");
-            addURLToDownload(new URL(picUrl));
+    public String getGID(URL url) throws MalformedURLException {
+        Pattern p = Pattern.compile("^https?://([wm]+\\.)?newsfilter\\.org/gallery/([^/]+)$");
+        Matcher m = p.matcher(url.toExternalForm());
+        if (m.matches()) {
+            return m.group(2);
         }
-
-        waitForThreads();
+        throw new MalformedURLException(
+                "Expected newsfilter gallery format: http://newsfilter.org/gallery/galleryid" +
+                        " Got: " + url);
     }
 
     @Override
@@ -69,14 +55,30 @@ public class NewsfilterRipper extends AlbumRipper {
     }
 
     @Override
-    public String getGID(URL url) throws MalformedURLException {
-        Pattern p = Pattern.compile("^https?://([wm]+\\.)?newsfilter\\.org/gallery/([^/]+)$");
-        Matcher m = p.matcher(url.toExternalForm());
-        if (m.matches()) {
-            return m.group(2);
+    protected String getDomain() {
+        return DOMAIN;
+    }
+
+    @Override
+    protected Document getFirstPage() throws IOException {
+        return Http.url(url).get();
+    }
+
+    @Override
+    protected List<String> getURLsFromPage(Document page) {
+        List<String> imgURLs = new ArrayList<>();
+        Elements thumbnails = page.select("#galleryImages .inner-block img");
+        for (Element thumb : thumbnails) {
+            String thumbUrl = thumb.attr("src");
+            String picUrl = thumbUrl.replace("thumbs/", "");
+            // use HTTP instead of HTTPS (less headaches)
+            imgURLs.add(picUrl.replaceFirst("https://", "http://"));
         }
-        throw new MalformedURLException(
-            "Expected newsfilter gallery format: http://newsfilter.org/gallery/galleryid" +
-            " Got: " + url);
+        return imgURLs;
+    }
+
+    @Override
+    protected void downloadURL(URL url, int index) {
+        addURLToDownload(url, getPrefix(index));
     }
 }
diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/NewsfilterRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/NewsfilterRipperTest.java
index 4a5b55aa..c22ba9c5 100644
--- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/NewsfilterRipperTest.java
+++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/NewsfilterRipperTest.java
@@ -6,5 +6,9 @@ import java.net.URL;
 import com.rarchives.ripme.ripper.rippers.NewsfilterRipper;
 
 public class NewsfilterRipperTest extends RippersTest {
-    // TODO add a test
+
+    public void testNewsfilterRip() throws IOException {
+        NewsfilterRipper ripper = new NewsfilterRipper(new URL("http://newsfilter.org/gallery/he-doubted-she-would-fuck-on-cam-happy-to-be-proven-wrong-216799"));
+        testRipper(ripper);
+    }
 }
\ No newline at end of file

From 5ae2bb43e80d404d85ea35cabc509a5611d1b7aa Mon Sep 17 00:00:00 2001
From: Peter Szakacs <petersz256@gmail.com>
Date: Mon, 29 Oct 2018 17:05:31 +0100
Subject: [PATCH 3/7] Make NfsfwRipper inherit from AbstractHTMLRipper

Also make it have queue support since there are some galleries that have only
subalbum links. In case of galleries with both images and subalbums, such as
the one in the unit test, first rip the images in the base album and then the
images in the subalbum (and save them to a subdirectory of the current album
directory).

Note that Nfsfw.com showed some read time-outs when ripping the same albums that
it ripped Ok before. Adding a timeout in getNextPage() seems to help somewhat,
but if any issues are encountered for now the simplest fix seems to be to wait
a while before trying to rip the album again.
---
 .../ripme/ripper/rippers/NfsfwRipper.java     | 232 ++++++++++--------
 .../tst/ripper/rippers/NfsfwRipperTest.java   |  18 +-
 2 files changed, 149 insertions(+), 101 deletions(-)

diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/NfsfwRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/NfsfwRipper.java
index 3585b6bb..b525a39a 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/NfsfwRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/NfsfwRipper.java
@@ -8,6 +8,7 @@ import java.util.List;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
+import com.rarchives.ripme.ripper.AbstractHTMLRipper;
 import org.jsoup.nodes.Document;
 import org.jsoup.nodes.Element;
 import org.jsoup.select.Elements;
@@ -18,13 +19,22 @@ import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
 import com.rarchives.ripme.utils.Http;
 import com.rarchives.ripme.utils.Utils;
 
-public class NfsfwRipper extends AlbumRipper {
+public class NfsfwRipper extends AbstractHTMLRipper {
 
     private static final String DOMAIN = "nfsfw.com",
                                 HOST   = "nfsfw";
 
-    private Document albumDoc = null;
 
+    private int index = 0;
+    private String currentDir = "";
+    private List<String> subalbumURLs = new ArrayList<>();
+    private Pattern subalbumURLPattern = Pattern.compile(
+            "https?://[wm.]*nfsfw.com/gallery/v/[^/]+/(.+)$"
+    );
+
+    // cached first page
+    private Document fstPage;
+    // threads pool for downloading images from image pages
     private DownloadThreadPool nfsfwThreadPool;
 
     public NfsfwRipper(URL url) throws IOException {
@@ -32,39 +42,104 @@ public class NfsfwRipper extends AlbumRipper {
         nfsfwThreadPool = new DownloadThreadPool("NFSFW");
     }
 
+    @Override
+    protected String getDomain() {
+        return DOMAIN;
+    }
+
     @Override
     public String getHost() {
         return HOST;
     }
 
     @Override
-    public URL sanitizeURL(URL url) throws MalformedURLException {
-        return url;
+    protected Document getFirstPage() throws IOException {
+        // cache the first page
+        this.fstPage = Http.url(url).get();
+        return fstPage;
     }
 
     @Override
-    public String getAlbumTitle(URL url) throws MalformedURLException {
-        try {
-            // Attempt to use album title as GID
-            if (albumDoc == null) {
-                albumDoc = Http.url(url).get();
+    public Document getNextPage(Document page) throws IOException {
+        String nextURL = null;
+        Elements a = page.select("a.next");
+        if (!a.isEmpty()){
+            // Get next page of current album
+            nextURL = "http://nfsfw.com" + a.first().attr("href");
+        } else if (!subalbumURLs.isEmpty()){
+            // Get next sub-album
+            nextURL = subalbumURLs.remove(0);
+            LOGGER.info("Detected subalbum URL at:" + nextURL);
+            Matcher m = subalbumURLPattern.matcher(nextURL);
+            if (m.matches()) {
+                // Set the new save directory and save images with a new index
+                this.currentDir = m.group(1);
+                this.index = 0;
+            } else {
+                LOGGER.error("Invalid sub-album URL: " + nextURL);
+                nextURL = null;
             }
-            String title = albumDoc.select("h2").first().text().trim();
-            return "nfsfw_" + Utils.filesystemSafe(title);
-        } catch (Exception e) {
-            // Fall back to default album naming convention
         }
-        return super.getAlbumTitle(url);
+        // Wait
+        try {
+            Thread.sleep(2000);
+        } catch (InterruptedException e) {
+            LOGGER.error("Interrupted while waiting to load next page", e);
+        }
+        if (nextURL == null){
+            throw new IOException("No more pages");
+        } else {
+            return Http.url(nextURL).get();
+        }
+    }
+
+    @Override
+    protected List<String> getURLsFromPage(Document page) {
+        List<String> imagePageURLs = getImagePageURLs(page);
+
+        // Check if any sub-albums are present on this page
+        List<String> subalbumURLs = getSubalbumURLs(page);
+        this.subalbumURLs.addAll(subalbumURLs);
+
+        return imagePageURLs;
+    }
+
+    @Override
+    protected void downloadURL(URL url, int index) {
+        // if we are now downloading a sub-album, all images in it
+        // should be indexed starting from 0
+        if (!this.currentDir.equals("")){
+            index = ++this.index;
+        }
+        NfsfwImageThread t = new NfsfwImageThread(url, currentDir, index);
+        nfsfwThreadPool.addThread(t);
+    }
+
+    @Override
+    public URL sanitizeURL(URL url) throws MalformedURLException {
+        // always start on the first page of an album
+        // (strip the options after the '?')
+        String u = url.toExternalForm();
+        if (u.contains("?")) {
+            u = u.substring(0, u.indexOf("?"));
+            return new URL(u);
+        } else {
+            return url;
+        }
     }
 
     @Override
     public String getGID(URL url) throws MalformedURLException {
         Pattern p; Matcher m;
 
-        p = Pattern.compile("https?://[wm.]*nfsfw.com/gallery/v/([a-zA-Z0-9\\-_]+).*");
+        p = Pattern.compile("https?://[wm.]*nfsfw.com/gallery/v/(.*)$");
         m = p.matcher(url.toExternalForm());
         if (m.matches()) {
-            return m.group(1);
+            String group = m.group(1);
+            if (group.endsWith("/")) {
+                group = group.substring(0, group.length() - 1);
+            }
+            return group.replaceAll("/", "__");
         }
 
         throw new MalformedURLException(
@@ -74,75 +149,51 @@ public class NfsfwRipper extends AlbumRipper {
     }
 
     @Override
-    public void rip() throws IOException {
-        List<Pair> subAlbums = new ArrayList<>();
-        int index = 0;
-        subAlbums.add(new Pair(this.url.toExternalForm(), ""));
-        while (!subAlbums.isEmpty()) {
-            if (isStopped()) {
-                break;
-            }
-            Pair nextAlbum = subAlbums.remove(0);
-            String nextURL = nextAlbum.first;
-            String nextSubalbum = nextAlbum.second;
-            sendUpdate(STATUS.LOADING_RESOURCE, nextURL);
-            LOGGER.info("    Retrieving " + nextURL);
-            if (albumDoc == null) {
-                albumDoc = Http.url(nextURL).get();
-            }
-            // Subalbums
-            for (Element suba : albumDoc.select("td.IMG > a")) {
-                if (isStopped() || isThisATest()) {
-                    break;
-                }
-                String subURL = "http://nfsfw.com" + suba.attr("href");
-                String subdir = subURL;
-                while (subdir.endsWith("/")) {
-                    subdir = subdir.substring(0, subdir.length() - 1);
-                }
-                subdir = subdir.substring(subdir.lastIndexOf("/") + 1);
-                subAlbums.add(new Pair(subURL, subdir));
-            }
-            // Images
-            for (Element thumb : albumDoc.select("td.giItemCell > div > a")) {
-                if (isStopped()) {
-                    break;
-                }
-                String imagePage = "http://nfsfw.com" + thumb.attr("href");
-                try {
-                    NfsfwImageThread t = new NfsfwImageThread(new URL(imagePage), nextSubalbum, ++index);
-                    nfsfwThreadPool.addThread(t);
-                    if (isThisATest()) {
-                        break;
-                    }
-                } catch (MalformedURLException mue) {
-                    LOGGER.warn("Invalid URL: " + imagePage);
-                }
-            }
-            if (isThisATest()) {
-                break;
-            }
-            // Get next page
-            for (Element a : albumDoc.select("a.next")) {
-                subAlbums.add(0, new Pair("http://nfsfw.com" + a.attr("href"), ""));
-                break;
-            }
-            // Insert next page at the top
-            albumDoc = null;
-            // Wait
-            try {
-                Thread.sleep(1000);
-            } catch (InterruptedException e) {
-                LOGGER.error("Interrupted while waiting to load next page", e);
-                throw new IOException(e);
-            }
-        }
-        nfsfwThreadPool.waitForThreads();
-        waitForThreads();
+    public DownloadThreadPool getThreadPool() {
+        return nfsfwThreadPool;
     }
 
-    public boolean canRip(URL url) {
-        return url.getHost().endsWith(DOMAIN);
+    @Override
+    public boolean hasQueueSupport() {
+        return true;
+    }
+
+    @Override
+    public boolean pageContainsAlbums(URL url) {
+        List<String> imageURLs = getImagePageURLs(fstPage);
+        List<String> subalbumURLs = getSubalbumURLs(fstPage);
+        return imageURLs.isEmpty() && !subalbumURLs.isEmpty();
+    }
+
+    @Override
+    public List<String> getAlbumsToQueue(Document doc) {
+        return getSubalbumURLs(doc);
+    }
+
+    // helper methods
+
+    private List<String> getImagePageURLs(Document page){
+        // get image pages
+        // NOTE: It might be possible to get the (non-thumbnail) image URL
+        // without going to its page first as there seems to be a pattern
+        // between the thumb and actual image URLs, but that is outside the
+        // scope of the current issue being solved.
+        List<String> imagePageURLs = new ArrayList<>();
+        for (Element thumb : page.select("td.giItemCell > div > a")) {
+            String imagePage = "http://nfsfw.com" + thumb.attr("href");
+            imagePageURLs.add(imagePage);
+        }
+        return imagePageURLs;
+    }
+
+    private List<String> getSubalbumURLs(Document page){
+        // Check if sub-albums are present on this page
+        List<String> subalbumURLs = new ArrayList<>();
+        for (Element suba : page.select("td.IMG > a")) {
+            String subURL = "http://nfsfw.com" + suba.attr("href");
+            subalbumURLs.add(subURL);
+        }
+        return subalbumURLs;
     }
 
     /**
@@ -175,23 +226,10 @@ public class NfsfwRipper extends AlbumRipper {
                 if (file.startsWith("/")) {
                     file = "http://nfsfw.com" + file;
                 }
-                String prefix = "";
-                if (Utils.getConfigBoolean("download.save_order", true)) {
-                    prefix = String.format("%03d_", index);
-                }
-                addURLToDownload(new URL(file), prefix, this.subdir);
+                addURLToDownload(new URL(file), getPrefix(index), this.subdir);
             } catch (IOException e) {
                 LOGGER.error("[!] Exception while loading/parsing " + this.url, e);
             }
         }
     }
-
-    private class Pair {
-        String first;
-        String second;
-        Pair(String first, String second) {
-            this.first = first;
-            this.second = second;
-        }
-    }
 }
\ No newline at end of file
diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/NfsfwRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/NfsfwRipperTest.java
index 3f1ba6cc..7f85fa5f 100644
--- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/NfsfwRipperTest.java
+++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/NfsfwRipperTest.java
@@ -6,11 +6,21 @@ import java.net.URL;
 import com.rarchives.ripme.ripper.rippers.NfsfwRipper;
 
 public class NfsfwRipperTest extends RippersTest {
-    /*
-    // https://github.com/RipMeApp/ripme/issues/291 -- nfsfw "account suspended" error; disabled flaky test in CI 
-    public void testNfsfwRip() throws IOException {
+    // https://github.com/RipMeApp/ripme/issues/291 -- nfsfw "account suspended" error; disabled flaky test in CI
+    /*public void testNfsfwRip() throws IOException {
         NfsfwRipper ripper = new NfsfwRipper(new URL("http://nfsfw.com/gallery/v/Kitten/"));
         testRipper(ripper);
+    }*/
+
+    public void testGetGID() throws IOException {
+        URL url = new URL("http://nfsfw.com/gallery/v/Kitten/");
+        NfsfwRipper ripper = new NfsfwRipper(url);
+        assertEquals("Kitten", ripper.getGID(url));
+        url = new URL("http://nfsfw.com/gallery/v/Kitten");
+        assertEquals("Kitten", ripper.getGID(url));
+        url = new URL("http://nfsfw.com/gallery/v/Kitten/gif_001/");
+        assertEquals("Kitten__gif_001", ripper.getGID(url));
+        url = new URL("http://nfsfw.com/gallery/v/Kitten/gif_001/");
+        assertEquals("Kitten__gif_001", ripper.getGID(url));
     }
-    */
 }

From da509663d186889f8b7dd625b9936fe3fc3a542f Mon Sep 17 00:00:00 2001
From: Peter Szakacs <petersz256@gmail.com>
Date: Mon, 29 Oct 2018 21:03:25 +0100
Subject: [PATCH 4/7] Make PornhubRipper inherit from AbstractHTMLRipper

Also make sure that if the album has multiple pages, make it download
all pages (tested).
---
 .../ripme/ripper/rippers/PornhubRipper.java   | 124 +++++++++---------
 .../tst/ripper/rippers/PornhubRipperTest.java |  24 +++-
 2 files changed, 84 insertions(+), 64 deletions(-)

diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/PornhubRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/PornhubRipper.java
index bffd0f2d..eb7a421b 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/PornhubRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/PornhubRipper.java
@@ -4,9 +4,12 @@ import java.io.File;
 import java.io.IOException;
 import java.net.MalformedURLException;
 import java.net.URL;
+import java.util.ArrayList;
+import java.util.List;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
+import com.rarchives.ripme.ripper.AbstractHTMLRipper;
 import org.jsoup.nodes.Document;
 import org.jsoup.nodes.Element;
 import org.jsoup.select.Elements;
@@ -17,7 +20,7 @@ import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
 import com.rarchives.ripme.utils.Http;
 import com.rarchives.ripme.utils.Utils;
 
-public class PornhubRipper extends AlbumRipper {
+public class PornhubRipper extends AbstractHTMLRipper {
     // All sleep times are in milliseconds
     private static final int IMAGE_SLEEP_TIME    = 1000;
 
@@ -26,9 +29,6 @@ public class PornhubRipper extends AlbumRipper {
     // Thread pool for finding direct image links from "image" pages (html)
     private DownloadThreadPool pornhubThreadPool = new DownloadThreadPool("pornhub");
 
-    // Current HTML document
-    private Document albumDoc = null;
-
     public PornhubRipper(URL url) throws IOException {
         super(url);
     }
@@ -38,25 +38,63 @@ public class PornhubRipper extends AlbumRipper {
         return HOST;
     }
 
-    public URL sanitizeURL(URL url) throws MalformedURLException {
-        return url;
+    @Override
+    protected String getDomain() {
+        return DOMAIN;
     }
 
-    public String getAlbumTitle(URL url) throws MalformedURLException {
-        try {
-            // Attempt to use album title as GID
-            if (albumDoc == null) {
-                LOGGER.info("    Retrieving " + url.toExternalForm());
-                sendUpdate(STATUS.LOADING_RESOURCE, url.toString());
-                albumDoc = Http.url(url).get();
-            }
-            Elements elems = albumDoc.select(".photoAlbumTitleV2");
-            return HOST + "_" + elems.get(0).text();
-        } catch (Exception e) {
-            // Fall back to default album naming convention
-            LOGGER.warn("Failed to get album title from " + url, e);
+    @Override
+    protected Document getFirstPage() throws IOException {
+        return Http.url(url).referrer(url).get();
+    }
+
+    @Override
+    public Document getNextPage(Document page) throws IOException {
+        Elements nextPageLink = page.select("li.page_next > a");
+        if (nextPageLink.isEmpty()){
+            throw new IOException("No more pages");
+        } else {
+            URL nextURL = new URL(this.url, nextPageLink.first().attr("href"));
+            return Http.url(nextURL).get();
+        }
+    }
+
+    @Override
+    protected List<String> getURLsFromPage(Document page) {
+        List<String> pageURLs = new ArrayList<>();
+        // Find thumbnails
+        Elements thumbs = page.select(".photoBlockBox li");
+        // Iterate over thumbnail images on page
+        for (Element thumb : thumbs) {
+            String imagePage = thumb.select(".photoAlbumListBlock > a")
+                    .first().attr("href");
+            String fullURL = "https://pornhub.com" + imagePage;
+            pageURLs.add(fullURL);
+        }
+        return pageURLs;
+    }
+
+    @Override
+    protected void downloadURL(URL url, int index) {
+        PornhubImageThread t = new PornhubImageThread(url, index, this.workingDir);
+        pornhubThreadPool.addThread(t);
+        try {
+            Thread.sleep(IMAGE_SLEEP_TIME);
+        } catch (InterruptedException e) {
+            LOGGER.warn("Interrupted while waiting to load next image", e);
+        }
+    }
+
+    public URL sanitizeURL(URL url) throws MalformedURLException {
+        // always start on the first page of an album
+        // (strip the options after the '?')
+        String u = url.toExternalForm();
+        if (u.contains("?")) {
+            u = u.substring(0, u.indexOf("?"));
+            return new URL(u);
+        } else {
+            return url;
         }
-        return super.getAlbumTitle(url);
     }
 
     @Override
@@ -64,7 +102,7 @@ public class PornhubRipper extends AlbumRipper {
         Pattern p;
         Matcher m;
 
-        p = Pattern.compile("^.*pornhub\\.com/album/([0-9]+)$");
+        p = Pattern.compile("^.*pornhub\\.com/album/([0-9]+).*$");
         m = p.matcher(url.toExternalForm());
         if (m.matches()) {
             return m.group(1);
@@ -77,48 +115,8 @@ public class PornhubRipper extends AlbumRipper {
     }
 
     @Override
-    public void rip() throws IOException {
-        int index = 0;
-        String nextUrl = this.url.toExternalForm();
-
-        if (albumDoc == null) {
-            LOGGER.info("    Retrieving album page " + nextUrl);
-            sendUpdate(STATUS.LOADING_RESOURCE, nextUrl);
-            albumDoc = Http.url(nextUrl)
-                           .referrer(this.url)
-                           .get();
-        }
-
-        // Find thumbnails
-        Elements thumbs = albumDoc.select(".photoBlockBox li");
-        if (thumbs.isEmpty()) {
-            LOGGER.debug("albumDoc: " + albumDoc);
-            LOGGER.debug("No images found at " + nextUrl);
-            return;
-        }
-
-        // Iterate over images on page
-        for (Element thumb : thumbs) {
-            if (isStopped()) {
-                break;
-            }
-            index++;
-            String imagePageUrl = thumb.select(".photoAlbumListBlock > a").first().attr("href");
-            URL imagePage = new URL(url, imagePageUrl);
-            PornhubImageThread t = new PornhubImageThread(imagePage, index, this.workingDir);
-            pornhubThreadPool.addThread(t);
-            if (isThisATest()) {
-                break;
-            }
-            try {
-                Thread.sleep(IMAGE_SLEEP_TIME);
-            } catch (InterruptedException e) {
-                LOGGER.warn("Interrupted while waiting to load next image", e);
-            }
-        }
-
-        pornhubThreadPool.waitForThreads();
-        waitForThreads();
+    public DownloadThreadPool getThreadPool(){
+        return pornhubThreadPool;
     }
 
     public boolean canRip(URL url) {
diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/PornhubRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/PornhubRipperTest.java
index 74bee8d9..278ad97c 100644
--- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/PornhubRipperTest.java
+++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/PornhubRipperTest.java
@@ -4,6 +4,8 @@ import java.io.IOException;
 import java.net.URL;
 
 import com.rarchives.ripme.ripper.rippers.PornhubRipper;
+import com.rarchives.ripme.utils.Http;
+import org.jsoup.nodes.Document;
 
 public class PornhubRipperTest extends RippersTest {
     public void testPornhubRip() throws IOException {
@@ -12,8 +14,28 @@ public class PornhubRipperTest extends RippersTest {
     }
 
     public void testGetGID() throws IOException {
-        URL url = new URL("https://www.pornhub.com/album/15680522");
+        URL url = new URL("https://www.pornhub.com/album/15680522?page=2");
         PornhubRipper ripper = new PornhubRipper(url);
         assertEquals("15680522", ripper.getGID(url));
+        url = new URL("https://www.pornhub.com/album/15680522");
+        assertEquals("15680522", ripper.getGID(url));
+    }
+
+    // alternate album, with only 2 pages: https://www.pornhub.com/album/4771891
+    public void testGetNextPage() throws IOException {
+        String baseURL = "https://www.pornhub.com/album/15680522";
+        PornhubRipper ripper = new PornhubRipper(new URL(baseURL));
+        Document page = Http.url(baseURL).get();
+        int numPagesRemaining = 4;
+        for (int idx = 0; idx < numPagesRemaining; idx++){
+            page = ripper.getNextPage(page);
+            assertEquals(baseURL + "?page=" + (idx + 2), page.location());
+        }
+        try {
+            page = ripper.getNextPage(page);
+            fail("Get next page did not throw an exception on the last page");
+        } catch(IOException e){
+            assertEquals(e.getMessage(), "No more pages");
+        }
     }
 }

From 3a8b87578f1be427d284197a7715751b30787354 Mon Sep 17 00:00:00 2001
From: Peter Szakacs <petersz256@gmail.com>
Date: Wed, 31 Oct 2018 13:30:46 +0100
Subject: [PATCH 5/7] Make PhotobucketRipper inherit AbstractHTMLRipper

Also change the API call used to check if an album has subalbums.
The new API call returns a JSON with more metadata.

The style is deliberately similar to NSFWRipper after refactoring
(commit: 5ae2bb43e80d404d85ea35cabc509a5611d1b7aa). Also, change
the waiting time between fetching pages from 1 second to 2 seconds,
as Photobucket seems to have fewer read timeouts with this value
(might just be a fluke of accessing it on moble networks though).
---
 .../ripper/rippers/PhotobucketRipper.java     | 371 +++++++++++-------
 .../ripper/rippers/PhotobucketRipperTest.java |  48 ++-
 2 files changed, 268 insertions(+), 151 deletions(-)

diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/PhotobucketRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/PhotobucketRipper.java
index ad0159b3..4906f824 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/PhotobucketRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/PhotobucketRipper.java
@@ -10,23 +10,63 @@ import java.util.regex.Pattern;
 
 import org.json.JSONArray;
 import org.json.JSONObject;
-import org.jsoup.Connection.Response;
 import org.jsoup.nodes.Document;
 import org.jsoup.nodes.Element;
-import org.jsoup.select.Elements;
 
-import com.rarchives.ripme.ripper.AlbumRipper;
+import com.rarchives.ripme.ripper.AbstractHTMLRipper;
 import com.rarchives.ripme.utils.Http;
 
-public class PhotobucketRipper extends AlbumRipper {
+public class PhotobucketRipper extends AbstractHTMLRipper {
 
     private static final String DOMAIN = "photobucket.com",
                                 HOST   = "photobucket";
+    private static final int ITEMS_PER_PAGE = 24;
+    private static final int WAIT_BEFORE_NEXT_PAGE = 2000;
 
-    private Response pageResponse = null;
+    private final class AlbumMetadata {
+        private final String url;
+        private final String location;
+        private final int sortOrder;
+        private int currPage = 1;
+        private int numPages;
+
+        private AlbumMetadata(JSONObject data) {
+            this.url = data.getString("url");
+            this.location = data.getString("location")
+                                .replace(" ", "_");
+            this.sortOrder = data.getInt("sortOrder");
+        }
+
+        private String getCurrPageURL(){
+            return url + String.format("?sort=%d&page=%d",
+                                       sortOrder, currPage);
+        }
+    }
+
+    private final Pattern collDataPattern;
+    private final Pattern pbURLPattern;
+
+    // all albums including sub-albums to rip
+    private List<AlbumMetadata> albums;
+    // the album currently being ripped
+    private AlbumMetadata currAlbum;
+    // a new index per album downloaded
+    private int index = 0;
 
     public PhotobucketRipper(URL url) throws IOException {
         super(url);
+        this.collDataPattern = Pattern.compile(
+                "^.*collectionData: (\\{.*}).*$", Pattern.DOTALL
+        );
+        this.pbURLPattern = Pattern.compile(
+                "^https?://([a-zA-Z0-9]+)\\.photobucket\\.com/user/" +
+                "([a-zA-Z0-9_\\-]+)/library/([^?]*).*$"
+        );
+    }
+
+    @Override
+    protected String getDomain() {
+        return DOMAIN;
     }
 
     @Override
@@ -34,45 +74,35 @@ public class PhotobucketRipper extends AlbumRipper {
         return HOST;
     }
 
+    @Override
     public URL sanitizeURL(URL url) throws MalformedURLException {
         LOGGER.info(url);
         String u = url.toExternalForm();
         if (u.contains("?")) {
+            // strip options from URL
             u = u.substring(0, u.indexOf("?"));
-            return new URL(u);
         }
-        else {
-            return url;
+        if (!u.endsWith("/")) {
+            // append trailing slash
+            u = u + "/";
         }
-    }
-
-    public String getAlbumTitle(URL url) throws MalformedURLException {
-        try {
-            // Attempt to use album title as GID
-            if (pageResponse == null) {
-                pageResponse = Http.url(url).response();
-            }
-            Document albumDoc = pageResponse.parse();
-            Elements els = albumDoc.select("div.libraryTitle > h1");
-            if (els.isEmpty()) {
-                throw new IOException("Could not find libraryTitle at " + url);
-            }
-            return els.get(0).text();
-        } catch (IOException e) {
-            // Fall back to default album naming convention
-        }
-        return super.getAlbumTitle(url);
+        return new URL(u);
     }
 
     @Override
     public String getGID(URL url) throws MalformedURLException {
-        Pattern p; Matcher m;
+        Matcher m;
+
+        URL sanitized = sanitizeURL(url);
 
         // http://s844.photobucket.com/user/SpazzySpizzy/library/Lady%20Gaga?sort=3&page=1
-        p = Pattern.compile("^https?://[a-zA-Z0-9]+\\.photobucket\\.com/user/([a-zA-Z0-9_\\-]+)/library.*$");
-        m = p.matcher(url.toExternalForm());
+        m = pbURLPattern.matcher(sanitized.toExternalForm());
         if (m.matches()) {
-            return m.group(1);
+            // the username is not really a unique GID, because the same user
+            // can have multiple albums, but on the other hand, using HOST_GID
+            // as save directory means we can group ripped albums of the same
+            // user.
+            return m.group(2);
         }
 
         throw new MalformedURLException(
@@ -81,134 +111,177 @@ public class PhotobucketRipper extends AlbumRipper {
                         + " Got: " + url);
     }
 
+
+
+    // Page iteration
+
+
+
     @Override
-    public void rip() throws IOException {
-        List<String> subalbums = ripAlbumAndGetSubalbums(this.url.toExternalForm());
-
-        List<String> subsToRip = new ArrayList<>(),
-                    rippedSubs = new ArrayList<>();
-
-        for (String sub : subalbums) {
-            subsToRip.add(sub);
+    protected Document getFirstPage() throws IOException {
+        if (this.currAlbum == null) {
+            this.albums = getAlbumMetadata(this.url.toExternalForm());
+            LOGGER.info("Detected " + albums.size() + " albums in total");
         }
-
-        while (!subsToRip.isEmpty() && !isStopped()) {
-            try {
-                Thread.sleep(1000);
-            } catch (InterruptedException e) {
-                break;
-            }
-            String nextSub = subsToRip.remove(0);
-            rippedSubs.add(nextSub);
-            LOGGER.info("Attempting to rip next subalbum: " + nextSub);
-            try {
-                pageResponse = null;
-                subalbums = ripAlbumAndGetSubalbums(nextSub);
-            } catch (IOException e) {
-                LOGGER.error("Error while ripping " + nextSub, e);
-                break;
-            }
-            for (String subalbum : subalbums) {
-                if (!subsToRip.contains(subalbum) && !rippedSubs.contains(subalbum)) {
-                    subsToRip.add(subalbum);
-                }
-            }
-        }
-        waitForThreads();
+        this.currAlbum = this.albums.remove(0);
+        // NOTE: Why not just get media count in the metadata json?
+        //
+        // Because that data might not reflect what the user sees on the page
+        // and can lead to iterating more pages than there actually are.
+        //
+        // An example:
+        // Metadata JSON -> AlbumStats: 146 images + 0 videos -> 146 items/7 pages
+        // http://s1255.photobucket.com/api/user/mimajki/album/Movie%20gifs/get?subAlbums=48&json=1
+        // Actual item count when looking at the album url: 131 items/6 pages
+        // http://s1255.photobucket.com/user/mimajki/library/Movie%20gifs?sort=6&page=1
+        Document page = Http.url(currAlbum.getCurrPageURL()).get();
+        JSONObject collectionData = getCollectionData(page);
+        int totalNumItems = collectionData.getInt("total");
+        this.currAlbum.numPages = (int) Math.ceil(
+                (double)totalNumItems / (double) ITEMS_PER_PAGE);
+        this.index = 0;
+        return page;
     }
 
-    private List<String> ripAlbumAndGetSubalbums(String theUrl) throws IOException {
-        int filesIndex = 0,
-            filesTotal = 0,
-            pageIndex = 0;
-        String currentAlbumPath = null,
-               url = null;
-
-        while (pageIndex == 0 || filesIndex < filesTotal) {
-            if (isStopped()) {
-                break;
-            }
-            pageIndex++;
-            if (pageIndex > 1 || pageResponse == null) {
-                url = theUrl + String.format("?sort=3&page=%d", pageIndex);
-                LOGGER.info("    Retrieving " + url);
-                pageResponse = Http.url(url).response();
-            }
-            Document albumDoc = pageResponse.parse();
-            // Retrieve JSON from request
-            String jsonString = null;
-            for (Element script : albumDoc.select("script[type=text/javascript]")) {
-                String data = script.data();
-                // Ensure this chunk of javascript contains the album info
-                if (!data.contains("libraryAlbumsPageCollectionData")) {
-                    continue;
-                }
-                // Grab the JSON
-                Pattern p; Matcher m;
-                p = Pattern.compile("^.*collectionData: (\\{.*}).*$", Pattern.DOTALL);
-                m = p.matcher(data);
-                if (m.matches()) {
-                    jsonString = m.group(1);
-                    break;
-                }
-            }
-            if (jsonString == null) {
-                LOGGER.error("Unable to find JSON data at URL: " + url);
-                break;
-            }
-            JSONObject json = new JSONObject(jsonString);
-            JSONObject items = json.getJSONObject("items");
-            JSONArray objects = items.getJSONArray("objects");
-            filesTotal = items.getInt("total");
-            currentAlbumPath = json.getString("currentAlbumPath");
-            for (int i = 0; i < objects.length(); i++) {
-                JSONObject object = objects.getJSONObject(i);
-                String image = object.getString("fullsizeUrl");
-                filesIndex += 1;
-                addURLToDownload(new URL(image),
-                        "",
-                        object.getString("location").replaceAll(" ", "_"),
-                        albumDoc.location(),
-                        pageResponse.cookies());
-            }
+    @Override
+    public Document getNextPage(Document page) throws IOException {
+        currAlbum.currPage++;
+        boolean endOfAlbum = currAlbum.currPage > currAlbum.numPages;
+        boolean noMoreSubalbums = albums.isEmpty();
+        if (endOfAlbum && noMoreSubalbums){
+            throw new IOException("No more pages");
         }
-        // Get subalbums
-        if (url != null) {
-            return getSubAlbums(url, currentAlbumPath);
-        } else {
-            return new ArrayList<>();
-        }
-    }
-
-    private List<String> getSubAlbums(String url, String currentAlbumPath) {
-        List<String> result = new ArrayList<>();
-        String subdomain = url.substring(url.indexOf("://")+3);
-        subdomain = subdomain.substring(0, subdomain.indexOf("."));
-        String apiUrl = "http://" + subdomain + ".photobucket.com/component/Albums-SubalbumList"
-                + "?deferCollapsed=true"
-                + "&albumPath=" + currentAlbumPath // %2Falbums%2Fab10%2FSpazzySpizzy"
-                + "&json=1";
         try {
-            LOGGER.info("Loading " + apiUrl);
-            JSONObject json = Http.url(apiUrl).getJSON();
-            JSONArray subalbums = json.getJSONObject("body").getJSONArray("subAlbums");
-            for (int i = 0; i < subalbums.length(); i++) {
-                String suburl =
-                        "http://"
-                        + subdomain
-                        + ".photobucket.com"
-                        + subalbums.getJSONObject(i).getString("path");
-                suburl = suburl.replace(" ", "%20");
-                result.add(suburl);
-            }
-        } catch (IOException e) {
-            LOGGER.error("Failed to get subalbums from " + apiUrl, e);
+            Thread.sleep(WAIT_BEFORE_NEXT_PAGE);
+        } catch (InterruptedException e) {
+            LOGGER.info("Interrupted while waiting before getting next page");
+        }
+        if (endOfAlbum){
+            LOGGER.info("Turning to next album " + albums.get(0).url);
+            return getFirstPage();
+        } else {
+            LOGGER.info("Turning to page " + currAlbum.currPage +
+                    " of album " + currAlbum.url);
+            return Http.url(currAlbum.getCurrPageURL()).get();
         }
-        return result;
     }
 
-    public boolean canRip(URL url) {
-        return url.getHost().endsWith(DOMAIN);
+
+
+    // Media parsing
+
+
+
+    @Override
+    protected List<String> getURLsFromPage(Document page) {
+        JSONObject collectionData = getCollectionData(page);
+        if (collectionData == null) {
+            LOGGER.error("Unable to find JSON data at URL: " + page.location());
+            return null;
+        } else {
+            return getImageURLs(collectionData);
+        }
     }
 
+    private JSONObject getCollectionData(Document page){
+        // Retrieve JSON from a script tag in the returned document
+        for (Element script : page.select("script[type=text/javascript]")) {
+            String data = script.data();
+            // Ensure this chunk of javascript contains the album info
+            if (data.contains("libraryAlbumsPageCollectionData")) {
+                Matcher m = collDataPattern.matcher(data);
+                if (m.matches()) {
+                    // Grab the JSON
+                    return new JSONObject(m.group(1));
+                }
+            }
+        }
+        return  null;
+    }
+
+    private List<String> getImageURLs(JSONObject json){
+        List<String> results = new ArrayList<>();
+        JSONObject items = json.getJSONObject("items");
+        JSONArray objects = items.getJSONArray("objects");
+        for (int i = 0; i < objects.length(); i++) {
+            JSONObject object = objects.getJSONObject(i);
+            String imgURL = object.getString("fullsizeUrl");
+            results.add(imgURL);
+        }
+        return results;
+    }
+
+    @Override
+    protected void downloadURL(URL url, int index) {
+        addURLToDownload(url, getPrefix(++this.index), currAlbum.location);
+    }
+
+
+
+    // helper methods (for album metadata retrieval)
+
+
+
+    private List<AlbumMetadata> getAlbumMetadata(String albumURL)
+            throws IOException {
+        JSONObject data = getAlbumMetadataJSON(albumURL);
+        List<AlbumMetadata> metadata = new ArrayList<>();
+        metadata.add(new AlbumMetadata(data));
+        if (!data.getString("location").equals("")) {
+            // if the location were to equal "", then we are at the profile
+            // page of a user. Ripping all sub-albums here would mean ripping
+            // all albums of a user (Not supported, only rip items in a users
+            // personal bucket).
+            for (JSONObject sub : getSubAlbumJSONs(data)){
+                metadata.add(new AlbumMetadata(sub));
+            }
+        }
+        LOGGER.info("Succesfully retrieved and parsed metadata");
+        return metadata;
+    }
+
+    private JSONObject getAlbumMetadataJSON(String albumURL)
+            throws IOException {
+        String subdomain, user, albumTitle;
+        Matcher m = pbURLPattern.matcher(albumURL);
+        if (!m.matches()){
+            throw new MalformedURLException("invalid URL " + albumURL);
+        }
+        subdomain = m.group(1);
+        user = m.group(2);
+        albumTitle = m.group(3);
+        if (albumTitle.endsWith("/")){
+            albumTitle = albumTitle.substring(0, albumTitle.length() - 1);
+        }
+        String apiURL = String.format("http://%s.photobucket.com/api/user/" +
+                        "%s/album/%s/get?subAlbums=%d&json=1",
+                subdomain, user, albumTitle, ITEMS_PER_PAGE);
+        LOGGER.info("Loading " + apiURL);
+        JSONObject data = Http.url(apiURL).getJSON().getJSONObject("data");
+        if (data.has("subAlbums")) {
+            int count = data.getInt("subAlbumCount");
+            if (count > ITEMS_PER_PAGE) {
+                apiURL = String.format("http://%s.photobucket.com/api/user/" +
+                                "%s/album/%s/get?subAlbums=%d&json=1",
+                        subdomain, user, albumTitle, count);
+                data = Http.url(apiURL).getJSON().getJSONObject("data");
+            }
+        }
+        return data;
+    }
+
+    private List<JSONObject> getSubAlbumJSONs(JSONObject data) {
+        List<JSONObject> subalbumJSONs = new ArrayList<>();
+        if (data.has("subAlbums")) {
+            JSONArray subalbums = data.getJSONArray("subAlbums");
+            for (int idx = 0; idx < subalbums.length(); idx++) {
+                JSONObject subalbumJSON = subalbums.getJSONObject(idx);
+                subalbumJSONs.add(subalbumJSON);
+            }
+        }
+        return subalbumJSONs;
+    }
+
+    // TODO: Probably want to add queue support for cases like this:
+    // http://s732.photobucket.com/user/doublesix66/library/WARZONE?sort=3&page=1
 }
\ No newline at end of file
diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/PhotobucketRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/PhotobucketRipperTest.java
index dff101a0..30885eaa 100644
--- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/PhotobucketRipperTest.java
+++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/PhotobucketRipperTest.java
@@ -15,7 +15,51 @@ public class PhotobucketRipperTest extends RippersTest {
         deleteDir(ripper.getWorkingDir());
     }
     */
-}
-
 
+    /*
+    // new test, still commented out because of the issue above,
+    // since this test also involves network IO.
+    public void testGetNextPage() throws IOException {
+        // this album should have more than enough sub-albums and pages
+        // to serve as a pretty good iteration test (barring server or
+        // network errors)
+        String baseURL = "http://s1255.photobucket.com/user/mimajki/library/Movie%20gifs?sort=6&page=1";
+        URL url = new URL(baseURL);
+        PhotobucketRipper ripper = new PhotobucketRipper(url);
+        org.jsoup.nodes.Document page = null;
+        try {
+            // I'm not sure it makes much sense that getFirstPage()
+            // is not public while getNextPage() is.
+            java.lang.reflect.Method method = ripper.getClass()
+                                                    .getDeclaredMethod("getFirstPage");
+            method.setAccessible(true);
+            page = (org.jsoup.nodes.Document) method.invoke(ripper);
+        } catch (Exception e){
+            e.printStackTrace();
+            fail("Calling getFirstPage() failed");
+        }
+        int numPagesRemaining = 38;
+        for (int idx = 0; idx < numPagesRemaining; idx++){
+            page = ripper.getNextPage(page);
+            System.out.println("URL: " + page.location());
+        }
+        try {
+            page = ripper.getNextPage(page);
+            fail("Get next page did not throw an exception on the last page");
+        } catch(IOException e){
+            assertEquals(e.getMessage(), "No more pages");
+        }
+    }*/
 
+    public void testGetGID() throws IOException {
+        URL url = new URL("http://s732.photobucket.com/user/doublesix66/library/Army%20Painter%20examples?sort=3&page=1");
+        PhotobucketRipper ripper = new PhotobucketRipper(url);
+        assertEquals("doublesix66", ripper.getGID(url));
+        url = new URL("http://s732.photobucket.com/user/doublesix66/library/Army%20Painter%20examples/Painting%20examples?page=1&sort=3");
+        assertEquals("doublesix66", ripper.getGID(url));
+        url = new URL("http://s844.photobucket.com/user/SpazzySpizzy/library/Album%20Covers");
+        assertEquals("SpazzySpizzy", ripper.getGID(url));
+        url = new URL("http://s844.photobucket.com/user/SpazzySpizzy/library");
+        assertEquals("SpazzySpizzy", ripper.getGID(url));
+    }
+}
\ No newline at end of file

From dbf153446bc92627d02fa688bd3ceaa2103b4689 Mon Sep 17 00:00:00 2001
From: cyian-1756 <devnull64@vfemail.net>
Date: Fri, 2 Nov 2018 14:56:30 -0400
Subject: [PATCH 6/7] Fixed mangadex ripper

---
 .../rarchives/ripme/ripper/rippers/MangadexRipper.java    | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/MangadexRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/MangadexRipper.java
index 727a6575..6697a45b 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/MangadexRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/MangadexRipper.java
@@ -19,8 +19,8 @@ import java.util.regex.Pattern;
 public class MangadexRipper extends AbstractJSONRipper {
     private String chapterApiEndPoint = "https://mangadex.org/api/chapter/";
 
-    private String getImageUrl(String chapterHash, String imageName) {
-        return "https://mangadex.org/data/" + chapterHash + "/" + imageName;
+    private String getImageUrl(String chapterHash, String imageName, String server) {
+        return server + chapterHash + "/" + imageName;
     }
 
     public MangadexRipper(URL url) throws IOException {
@@ -72,11 +72,13 @@ public class MangadexRipper extends AbstractJSONRipper {
         JSONArray currentObject;
 
         String chapterHash = json.getString("hash");
+        // Server is the cdn hosting the images.
+        String server = json.getString("server");
 
         for (int i = 0; i < json.getJSONArray("page_array").length(); i++) {
             currentObject = json.getJSONArray("page_array");
 
-            assetURLs.add(getImageUrl(chapterHash, currentObject.getString(i)));
+            assetURLs.add(getImageUrl(chapterHash, currentObject.getString(i), server));
         }
 
         return assetURLs;

From 53f4624ee1e34f9b356952c7bc409c03206c1205 Mon Sep 17 00:00:00 2001
From: Peter Szakacs <petersz256@gmail.com>
Date: Mon, 5 Nov 2018 17:04:23 +0100
Subject: [PATCH 7/7] Moved unresolved comment

---
 .../rarchives/ripme/ripper/rippers/PhotobucketRipper.java    | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/PhotobucketRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/PhotobucketRipper.java
index 4906f824..d436af1f 100644
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/PhotobucketRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/PhotobucketRipper.java
@@ -16,6 +16,8 @@ import org.jsoup.nodes.Element;
 import com.rarchives.ripme.ripper.AbstractHTMLRipper;
 import com.rarchives.ripme.utils.Http;
 
+// TODO: Probably want to add queue support for cases like this:
+// http://s732.photobucket.com/user/doublesix66/library/WARZONE?sort=3&page=1
 public class PhotobucketRipper extends AbstractHTMLRipper {
 
     private static final String DOMAIN = "photobucket.com",
@@ -281,7 +283,4 @@ public class PhotobucketRipper extends AbstractHTMLRipper {
         }
         return subalbumJSONs;
     }
-
-    // TODO: Probably want to add queue support for cases like this:
-    // http://s732.photobucket.com/user/doublesix66/library/WARZONE?sort=3&page=1
 }
\ No newline at end of file