From 188184bdf2170cb0eba166f4d3036b85bc82d5e6 Mon Sep 17 00:00:00 2001 From: Felix Friebe Date: Sun, 17 Nov 2019 00:42:08 -0600 Subject: [PATCH] Fixed MotherlessRipper Issue #1: If the Homepage of a gallery is opened (path /Gxxxx), theres no next page (path /Gxxxx?page=2). If an homepage link is added, the path is now changed to the "All Uploads" page (path /GMxxxx) Issue #2: All paths were changed to https:// since insecure connections didn't work with a user Other Changes: #1: MotherlessRipper class used to override run method. Now the original method from AbstractHTMLRipper class is used to avoid redundant code. #2: MotherlessRipper class now implements the getNextPage method. Getting the next page was previously done by the run method. Also the link of the next page is now read from a link-tag in the HTML header and not "calculated" anymore. --- .../ripper/rippers/MotherlessRipper.java | 70 ++++++++++--------- 1 file changed, 36 insertions(+), 34 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/MotherlessRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/MotherlessRipper.java index 9b71d756..7bb8451a 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/MotherlessRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/MotherlessRipper.java @@ -16,8 +16,11 @@ import com.rarchives.ripme.ripper.DownloadThreadPool; import com.rarchives.ripme.ui.RipStatusMessage.STATUS; import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.Utils; +import org.jsoup.select.Elements; public class MotherlessRipper extends AbstractHTMLRipper { + // All sleep times are in milliseconds + private static final int IMAGE_SLEEP_TIME = 1000; private static final String DOMAIN = "motherless.com", HOST = "motherless"; @@ -46,7 +49,32 @@ public class MotherlessRipper extends AbstractHTMLRipper { @Override protected Document getFirstPage() throws IOException { - return Http.url(url).referrer("http://motherless.com").get(); + URL firstURL = this.url; + String path = this.url.getPath(); + // Check if "All Uploads" (/GMxxxx), Image (/GIxxxx) or Video (/GVxxxx) gallery since there's no "next" after the homepage (/Gxxxx) + Pattern p = Pattern.compile("[MIV]"); + Matcher m = p.matcher(String.valueOf(path.charAt(2))); + boolean notHome = m.matches(); + // If it's the homepage go to the "All Uploads" gallery (/Gxxxxx -> /GMxxxxx) + if (!notHome) { + StringBuilder newPath = new StringBuilder(path); + newPath.insert(2, "M"); + firstURL = new URL(this.url, "https://" + DOMAIN + newPath); + LOGGER.info("Changed URL to " + firstURL); + } + return Http.url(firstURL).referrer("https://motherless.com").get(); + } + + @Override + public Document getNextPage(Document doc) throws IOException { + Elements nextPageLink = doc.head().select("link[rel=next]"); + if (nextPageLink.isEmpty()) { + throw new IOException("Last page reached"); + } else { + String referrerLink = doc.head().select("link[rel=canonical]").first().attr("href"); + URL nextURL = new URL(this.url, nextPageLink.first().attr("href")); + return Http.url(nextURL).referrer(referrerLink).get(); + } } @Override @@ -64,7 +92,7 @@ public class MotherlessRipper extends AbstractHTMLRipper { String url; if (!thumbURL.startsWith("http")) { - url = "http://" + DOMAIN + thumbURL; + url = "https://" + DOMAIN + thumbURL; } else { url = thumbURL; } @@ -83,6 +111,11 @@ public class MotherlessRipper extends AbstractHTMLRipper { // Create thread for finding image at "url" page MotherlessImageThread mit = new MotherlessImageThread(url, index); motherlessThreadPool.addThread(mit); + try { + Thread.sleep(IMAGE_SLEEP_TIME); + } catch (InterruptedException e) { + LOGGER.warn("Interrupted while waiting to load next image", e); + } } @Override @@ -112,40 +145,9 @@ public class MotherlessRipper extends AbstractHTMLRipper { if (m.matches()) { return m.group(m.groupCount()); } - throw new MalformedURLException("Expected URL format: http://motherless.com/GIXXXXXXX, got: " + url); + throw new MalformedURLException("Expected URL format: https://motherless.com/GIXXXXXXX, got: " + url); } - @Override - public void rip() throws IOException { - int index = 0, page = 1; - String nextURL = this.url.toExternalForm(); - while (nextURL != null) { - if (isStopped()) { - break; - } - LOGGER.info("Retrieving " + nextURL); - sendUpdate(STATUS.LOADING_RESOURCE, nextURL); - Document doc = getFirstPage(); - List URLs = getURLsFromPage(doc); - - for (String url: URLs) { - downloadURL(new URL(url), index); - index ++; - } - - if (isThisATest()) { - break; - } - // Next page - nextURL = null; - page++; - if (doc.html().contains("?page=" + page)) { - nextURL = this.url.toExternalForm() + "?page=" + page; - } - } - motherlessThreadPool.waitForThreads(); - waitForThreads(); - } /** * Helper class to find and download images found on "image" pages