From c166f93d5753daaefd69b6dd41551b4869647454 Mon Sep 17 00:00:00 2001 From: 4pr0n Date: Sun, 22 Jun 2014 19:12:29 -0700 Subject: [PATCH] Moving from MultiPage ripper to HTML ripper, added JSON ripper --- ...ageRipper.java => AbstractHTMLRipper.java} | 19 +- .../ripme/ripper/AbstractJSONRipper.java | 93 +++++++ .../ripper/AbstractSinglePageRipper.java | 2 +- .../ripper/rippers/DeviantartRipper.java | 4 +- .../ripme/ripper/rippers/DrawcrowdRipper.java | 4 +- .../ripme/ripper/rippers/EHentaiRipper.java | 212 +++++++-------- .../ripper/rippers/EightmusesRipper.java | 253 ++++++++---------- .../ripme/ripper/rippers/FapprovedRipper.java | 126 ++++----- .../ripme/ripper/rippers/FlickrRipper.java | 178 ++++++------ .../ripme/ripper/rippers/GifyoRipper.java | 155 +++++------ .../ripper/rippers/GirlsOfDesireRipper.java | 57 ++-- .../ripme/ripper/rippers/GonewildRipper.java | 132 ++++----- 12 files changed, 658 insertions(+), 577 deletions(-) rename src/main/java/com/rarchives/ripme/ripper/{AbstractMultiPageRipper.java => AbstractHTMLRipper.java} (81%) create mode 100644 src/main/java/com/rarchives/ripme/ripper/AbstractJSONRipper.java diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractMultiPageRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java similarity index 81% rename from src/main/java/com/rarchives/ripme/ripper/AbstractMultiPageRipper.java rename to src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java index 67142866..ddd00cc2 100644 --- a/src/main/java/com/rarchives/ripme/ripper/AbstractMultiPageRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java @@ -10,9 +10,9 @@ import org.jsoup.nodes.Document; import com.rarchives.ripme.ui.RipStatusMessage.STATUS; import com.rarchives.ripme.utils.Utils; -public abstract class AbstractMultiPageRipper extends AlbumRipper { +public abstract class AbstractHTMLRipper extends AlbumRipper { - public AbstractMultiPageRipper(URL url) throws IOException { + public AbstractHTMLRipper(URL url) throws IOException { super(url); } @@ -23,6 +23,9 @@ public abstract class AbstractMultiPageRipper extends AlbumRipper { public abstract Document getNextPage(Document doc) throws IOException; public abstract List getURLsFromPage(Document page); public abstract void downloadURL(URL url, int index); + public DownloadThreadPool getThreadPool() { + return null; + } public boolean keepSortOrder() { return true; @@ -54,19 +57,29 @@ public abstract class AbstractMultiPageRipper extends AlbumRipper { for (String imageURL : imageURLs) { if (isStopped()) { - logger.info("Interrupted"); break; } index += 1; downloadURL(new URL(imageURL), index); } + + if (isStopped()) { + break; + } + try { + sendUpdate(STATUS.LOADING_RESOURCE, "next page"); doc = getNextPage(doc); } catch (IOException e) { logger.info("Can't get next page: " + e.getMessage()); break; } } + + // If they're using a thread pool, wait for it. + if (getThreadPool() != null) { + getThreadPool().waitForThreads(); + } waitForThreads(); } diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractJSONRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractJSONRipper.java new file mode 100644 index 00000000..0641e61a --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/AbstractJSONRipper.java @@ -0,0 +1,93 @@ +package com.rarchives.ripme.ripper; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.List; + +import org.json.JSONObject; + +import com.rarchives.ripme.ui.RipStatusMessage.STATUS; +import com.rarchives.ripme.utils.Utils; + +public abstract class AbstractJSONRipper extends AlbumRipper { + + public AbstractJSONRipper(URL url) throws IOException { + super(url); + } + + public abstract String getDomain(); + public abstract String getHost(); + + public abstract JSONObject getFirstPage() throws IOException; + public abstract JSONObject getNextPage(JSONObject json) throws IOException; + public abstract List getURLsFromJSON(JSONObject json); + public abstract void downloadURL(URL url, int index); + public DownloadThreadPool getThreadPool() { + return null; + } + + public boolean keepSortOrder() { + return true; + } + + @Override + public boolean canRip(URL url) { + return url.getHost().endsWith(getDomain()); + } + + @Override + public URL sanitizeURL(URL url) throws MalformedURLException { + return url; + } + + @Override + public void rip() throws IOException { + int index = 0; + logger.info("Retrieving " + this.url); + sendUpdate(STATUS.LOADING_RESOURCE, this.url.toExternalForm()); + JSONObject json = getFirstPage(); + + while (json != null) { + List imageURLs = getURLsFromJSON(json); + + if (imageURLs.size() == 0) { + throw new IOException("No images found at " + this.url); + } + + for (String imageURL : imageURLs) { + if (isStopped()) { + break; + } + index += 1; + downloadURL(new URL(imageURL), index); + } + + if (isStopped()) { + break; + } + + try { + sendUpdate(STATUS.LOADING_RESOURCE, "next page"); + json = getNextPage(json); + } catch (IOException e) { + logger.info("Can't get next page: " + e.getMessage()); + break; + } + } + + // If they're using a thread pool, wait for it. + if (getThreadPool() != null) { + getThreadPool().waitForThreads(); + } + waitForThreads(); + } + + public String getPrefix(int index) { + String prefix = ""; + if (keepSortOrder() && Utils.getConfigBoolean("download.save_order", true)) { + prefix = String.format("%03d_", index); + } + return prefix; + } +} \ No newline at end of file diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractSinglePageRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractSinglePageRipper.java index 8095ef11..37319661 100644 --- a/src/main/java/com/rarchives/ripme/ripper/AbstractSinglePageRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/AbstractSinglePageRipper.java @@ -24,7 +24,7 @@ public abstract class AbstractSinglePageRipper extends AlbumRipper { public abstract void downloadURL(URL url, int index); public boolean keepSortOrder() { - return false; + return true; } @Override diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java index a221ee15..5b201a07 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java @@ -19,12 +19,12 @@ import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; -import com.rarchives.ripme.ripper.AbstractMultiPageRipper; +import com.rarchives.ripme.ripper.AbstractHTMLRipper; import com.rarchives.ripme.utils.Base64; import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.Utils; -public class DeviantartRipper extends AbstractMultiPageRipper { +public class DeviantartRipper extends AbstractHTMLRipper { private static final int SLEEP_TIME = 2000; diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/DrawcrowdRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/DrawcrowdRipper.java index 3a313047..83fba022 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/DrawcrowdRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/DrawcrowdRipper.java @@ -12,10 +12,10 @@ import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; -import com.rarchives.ripme.ripper.AbstractMultiPageRipper; +import com.rarchives.ripme.ripper.AbstractHTMLRipper; import com.rarchives.ripme.utils.Http; -public class DrawcrowdRipper extends AbstractMultiPageRipper { +public class DrawcrowdRipper extends AbstractHTMLRipper { public DrawcrowdRipper(URL url) throws IOException { super(url); diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/EHentaiRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/EHentaiRipper.java index 26364cbf..44790487 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/EHentaiRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/EHentaiRipper.java @@ -4,7 +4,9 @@ import java.io.File; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; +import java.util.ArrayList; import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -13,22 +15,26 @@ import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; -import com.rarchives.ripme.ripper.AlbumRipper; +import com.rarchives.ripme.ripper.AbstractHTMLRipper; import com.rarchives.ripme.ripper.DownloadThreadPool; import com.rarchives.ripme.ui.RipStatusMessage.STATUS; import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.Utils; -public class EHentaiRipper extends AlbumRipper { +public class EHentaiRipper extends AbstractHTMLRipper { // All sleep times are in milliseconds - private static final int PAGE_SLEEP_TIME = 3 * 1000; - private static final int IMAGE_SLEEP_TIME = 1 * 1000; - private static final int IP_BLOCK_SLEEP_TIME = 60 * 1000; + private static final int PAGE_SLEEP_TIME = 3000; + private static final int IMAGE_SLEEP_TIME = 1500; + private static final int IP_BLOCK_SLEEP_TIME = 60 * 1000; - private static final String DOMAIN = "g.e-hentai.org", HOST = "e-hentai"; + private String lastURL = null; // Thread pool for finding direct image links from "image" pages (html) private DownloadThreadPool ehentaiThreadPool = new DownloadThreadPool("ehentai"); + @Override + public DownloadThreadPool getThreadPool() { + return ehentaiThreadPool; + } // Current HTML document private Document albumDoc = null; @@ -45,25 +51,22 @@ public class EHentaiRipper extends AlbumRipper { @Override public String getHost() { - return HOST; + return "e-hentai"; } - - public URL sanitizeURL(URL url) throws MalformedURLException { - return url; + + @Override + public String getDomain() { + return "g.e-hentai.org"; } public String getAlbumTitle(URL url) throws MalformedURLException { try { // Attempt to use album title as GID if (albumDoc == null) { - sendUpdate(STATUS.LOADING_RESOURCE, url.toString()); - logger.info("Retrieving " + url); - albumDoc = Http.url(url) - .cookies(cookies) - .get(); + albumDoc = getPageWithRetries(url); } Elements elems = albumDoc.select("#gn"); - return HOST + "_" + elems.get(0).text(); + return getHost() + "_" + elems.first().text(); } catch (Exception e) { // Fall back to default album naming convention logger.warn("Failed to get album title from " + url, e); @@ -87,95 +90,97 @@ public class EHentaiRipper extends AlbumRipper { + "http://g.e-hentai.org/g/####/####/" + " Got: " + url); } - - @Override - public void rip() throws IOException { - int index = 0, retries = 3; - String nextUrl = this.url.toExternalForm(); + + /** + * Attempts to get page, checks for IP ban, waits. + * @param url + * @return Page document + * @throws IOException If page loading errors, or if retries are exhausted + */ + private Document getPageWithRetries(URL url) throws IOException { + Document doc; + int retries = 3; while (true) { - if (isStopped()) { - break; - } - if (albumDoc == null) { - logger.info(" Retrieving album page " + nextUrl); - sendUpdate(STATUS.LOADING_RESOURCE, nextUrl); - albumDoc = Http.url(nextUrl) - .referrer(this.url) - .cookies(cookies) - .get(); - } - // Check for rate limiting - if (albumDoc.toString().contains("IP address will be automatically banned")) { + sendUpdate(STATUS.LOADING_RESOURCE, url.toExternalForm()); + logger.info("Retrieving " + url); + doc = Http.url(url) + .referrer(this.url) + .cookies(cookies) + .get(); + if (doc.toString().contains("IP address will be automatically banned")) { if (retries == 0) { - logger.error("Hit rate limit and maximum number of retries, giving up"); - break; + throw new IOException("Hit rate limit and maximum number of retries, giving up"); } - logger.warn("Hit rate limit while loading " + nextUrl + ", sleeping for " + IP_BLOCK_SLEEP_TIME + "ms, " + retries + " retries remaining"); + logger.warn("Hit rate limit while loading " + url + ", sleeping for " + IP_BLOCK_SLEEP_TIME + "ms, " + retries + " retries remaining"); retries--; try { Thread.sleep(IP_BLOCK_SLEEP_TIME); } catch (InterruptedException e) { - logger.error("Interrupted while waiting for rate limit to subside", e); - break; - } - albumDoc = null; - continue; - } - // Find thumbnails - Elements thumbs = albumDoc.select("#gdt > .gdtm a"); - if (thumbs.size() == 0) { - logger.info("albumDoc: " + albumDoc); - logger.info("No images found at " + nextUrl); - break; - } - // Iterate over images on page - for (Element thumb : thumbs) { - if (isStopped()) { - break; - } - index++; - EHentaiImageThread t = new EHentaiImageThread(new URL(thumb.attr("href")), index, this.workingDir); - ehentaiThreadPool.addThread(t); - try { - Thread.sleep(IMAGE_SLEEP_TIME); - } catch (InterruptedException e) { - logger.warn("Interrupted while waiting to load next image", e); + throw new IOException("Interrupted while waiting for rate limit to subside"); } } - - if (isStopped()) { - break; - } - // Find next page - Elements hrefs = albumDoc.select(".ptt a"); - if (hrefs.size() == 0) { - logger.info("No navigation links found at " + nextUrl); - break; - } - // Ensure next page is different from the current page - String lastUrl = nextUrl; - nextUrl = hrefs.last().attr("href"); - if (lastUrl.equals(nextUrl)) { - break; // We're on the last page - } - - // Reset albumDoc so we fetch the page next time - albumDoc = null; - - // Sleep before loading next page - try { - Thread.sleep(PAGE_SLEEP_TIME); - } catch (InterruptedException e) { - logger.error("Interrupted while waiting to load next page", e); - break; + else { + return doc; } } - - waitForThreads(); } - public boolean canRip(URL url) { - return url.getHost().endsWith(DOMAIN); + @Override + public Document getFirstPage() throws IOException { + if (albumDoc == null) { + albumDoc = getPageWithRetries(this.url); + } + this.lastURL = this.url.toExternalForm(); + return albumDoc; + } + + @Override + public Document getNextPage(Document doc) throws IOException { + // Check if we've stopped + if (isStopped()) { + throw new IOException("Ripping interrupted"); + } + // Find next page + Elements hrefs = doc.select(".ptt a"); + if (hrefs.size() == 0) { + logger.info("doc: " + doc.html()); + throw new IOException("No navigation links found"); + } + // Ensure next page is different from the current page + String nextURL = hrefs.last().attr("href"); + if (nextURL.equals(this.lastURL)) { + logger.info("lastURL = nextURL : " + nextURL); + throw new IOException("Reached last page of results"); + } + // Sleep before loading next page + sleep(PAGE_SLEEP_TIME); + // Load next page + Document nextPage = getPageWithRetries(new URL(nextURL)); + this.lastURL = nextURL; + return nextPage; + } + + @Override + public List getURLsFromPage(Document page) { + List imageURLs = new ArrayList(); + Elements thumbs = page.select("#gdt > .gdtm a"); + // Iterate over images on page + for (Element thumb : thumbs) { + imageURLs.add(thumb.attr("href")); + } + return imageURLs; + } + + @Override + public void downloadURL(URL url, int index) { + EHentaiImageThread t = new EHentaiImageThread(url, index, this.workingDir); + ehentaiThreadPool.addThread(t); + try { + Thread.sleep(IMAGE_SLEEP_TIME); + } + catch (InterruptedException e) { + logger.warn("Interrupted while waiting to load next image", e); + } } /** @@ -187,7 +192,6 @@ public class EHentaiRipper extends AlbumRipper { private URL url; private int index; private File workingDir; - private int retries = 3; public EHentaiImageThread(URL url, int index, File workingDir) { super(); @@ -203,27 +207,7 @@ public class EHentaiRipper extends AlbumRipper { private void fetchImage() { try { - Document doc = Http.url(this.url) - .referrer(this.url) - .cookies(cookies) - .get(); - // Check for rate limit - if (doc.toString().contains("IP address will be automatically banned")) { - if (this.retries == 0) { - logger.error("Rate limited & ran out of retries, skipping image at " + this.url); - return; - } - logger.warn("Hit rate limit. Sleeping for " + IP_BLOCK_SLEEP_TIME + "ms"); - try { - Thread.sleep(IP_BLOCK_SLEEP_TIME); - } catch (InterruptedException e) { - logger.error("Interrupted while waiting for rate limit to subside", e); - return; - } - this.retries--; - fetchImage(); // Re-attempt to download the image - return; - } + Document doc = getPageWithRetries(this.url); // Find image Elements images = doc.select(".sni > a > img"); diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java index d79e1f35..b766442e 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/EightmusesRipper.java @@ -1,168 +1,41 @@ package com.rarchives.ripme.ripper.rippers; -import java.io.File; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.jsoup.Connection.Response; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; -import com.rarchives.ripme.ripper.AlbumRipper; +import com.rarchives.ripme.ripper.AbstractSinglePageRipper; import com.rarchives.ripme.ui.RipStatusMessage.STATUS; import com.rarchives.ripme.utils.Http; -import com.rarchives.ripme.utils.Utils; -public class EightmusesRipper extends AlbumRipper { - - private static final String DOMAIN = "8muses.com", - HOST = "8muses"; +public class EightmusesRipper extends AbstractSinglePageRipper { private Document albumDoc = null; + private Map cookies = new HashMap(); public EightmusesRipper(URL url) throws IOException { super(url); } - @Override - public boolean canRip(URL url) { - return url.getHost().endsWith(DOMAIN); - } - - @Override - public URL sanitizeURL(URL url) throws MalformedURLException { - return url; - } - - @Override - public String getAlbumTitle(URL url) throws MalformedURLException { - try { - // Attempt to use album title as GID - if (albumDoc == null) { - albumDoc = Http.url(url).get(); - } - Element titleElement = albumDoc.select("meta[name=description]").first(); - String title = titleElement.attr("content"); - title = title.substring(title.lastIndexOf('/') + 1); - return HOST + "_" + title.trim(); - } catch (IOException e) { - // Fall back to default album naming convention - logger.info("Unable to find title at " + url); - } - return super.getAlbumTitle(url); - } - - @Override - public void rip() throws IOException { - ripAlbum(this.url.toExternalForm(), this.workingDir); - waitForThreads(); - } - - private void ripAlbum(String url, File subdir) throws IOException { - logger.info(" Retrieving " + url); - sendUpdate(STATUS.LOADING_RESOURCE, url); - if (albumDoc == null) { - albumDoc = Http.url(url).get(); - } - - int index = 0; // Both album index and image index - if (albumDoc.select(".preview > span").size() > 0) { - // Page contains subalbums (not images) - for (Element subalbum : albumDoc.select("a.preview")) { - ripSubalbumFromPreview(subalbum, subdir, ++index); - } - } - else { - // Page contains images - for (Element thumb : albumDoc.select("img")) { - downloadImage(thumb, subdir, ++index); - } - } - } - - /** - * @param subalbum Anchor element of a subalbum - * @throws IOException - */ - private void ripSubalbumFromPreview(Element subalbum, File subdir, int index) throws IOException { - // Find + sanitize URL from Element - String subUrl = subalbum.attr("href"); - subUrl = subUrl.replaceAll("\\.\\./", ""); - if (subUrl.startsWith("//")) { - subUrl = "http:"; - } - else if (!subUrl.startsWith("http://")) { - subUrl = "http://www.8muses.com/" + subUrl; - } - // Prepend image index if enabled - // Get album title - String subTitle = subalbum.attr("alt"); - if (subTitle.equals("")) { - subTitle = getGID(new URL(subUrl)); - } - subTitle = Utils.filesystemSafe(subTitle); - // Create path to subdirectory - File subDir = new File(subdir.getAbsolutePath() + File.separator + subTitle); - if (!subDir.exists()) { - subDir.mkdirs(); - } - albumDoc = null; - ripAlbum(subUrl, subDir); - try { - Thread.sleep(2000); - } catch (InterruptedException e) { - logger.warn("Interrupted whiel waiting to load next album"); - } - } - - private void downloadImage(Element thumb, File subdir, int index) { - // Find thumbnail image source - String image = null; - if (thumb.hasAttr("data-cfsrc")) { - image = thumb.attr("data-cfsrc"); - } - else if (thumb.hasAttr("src")) { - image = thumb.attr("src"); - } - else { - logger.warn("Thumb does not havedata-cfsrc or src: " + thumb); - return; - } - // Remove relative directory path naming - image = image.replaceAll("\\.\\./", ""); - if (image.startsWith("//")) { - image = "http:" + image; - } - // Convert from thumb URL to full-size - if (image.contains("-cu_")) { - image = image.replaceAll("-cu_[^.]+", "-me"); - } - // Set download path - try { - URL imageURL = new URL(image); - String saveAs = subdir.getAbsolutePath() + File.separator; - if (Utils.getConfigBoolean("download.save_order", true)) { - // Append image index - saveAs += String.format("%03d_", index); - } - // Append image title - saveAs += Utils.filesystemSafe(thumb.attr("title")); - // Append extension - saveAs += image.substring(image.lastIndexOf('.')); - File saveFile = new File(saveAs); - // Download - addURLToDownload(imageURL, saveFile, thumb.baseUri(), null); - } catch (IOException e) { - logger.error("Failed to download image at " + image, e); - sendUpdate(STATUS.DOWNLOAD_ERRORED, "Failed to download image at " + image); - } - } - @Override public String getHost() { - return HOST; + return "8muses"; + } + @Override + public String getDomain() { + return "8muses.com"; } @Override @@ -175,4 +48,100 @@ public class EightmusesRipper extends AlbumRipper { return m.group(m.groupCount()); } + @Override + public String getAlbumTitle(URL url) throws MalformedURLException { + try { + // Attempt to use album title as GID + Element titleElement = getFirstPage().select("meta[name=description]").first(); + String title = titleElement.attr("content"); + title = title.substring(title.lastIndexOf('/') + 1); + return getHost() + "_" + title.trim(); + } catch (IOException e) { + // Fall back to default album naming convention + logger.info("Unable to find title at " + url); + } + return super.getAlbumTitle(url); + } + + @Override + public Document getFirstPage() throws IOException { + if (albumDoc == null) { + Response resp = Http.url(url).response(); + cookies.putAll(resp.cookies()); + albumDoc = resp.parse(); + } + return albumDoc; + } + + @Override + public List getURLsFromPage(Document page) { + List imageURLs = new ArrayList(); + if (page.select(".preview > span").size() > 0) { + // Page contains subalbums (not images) + Elements albumElements = page.select("a.preview"); + List albumsList = albumElements.subList(0, albumElements.size()); + Collections.reverse(albumsList); + // Iterate over elements in reverse order + for (Element subalbum : albumsList) { + String subUrl = subalbum.attr("href"); + subUrl = subUrl.replaceAll("\\.\\./", ""); + if (subUrl.startsWith("//")) { + subUrl = "http:"; + } + else if (!subUrl.startsWith("http://")) { + subUrl = "http://www.8muses.com/" + subUrl; + } + try { + logger.info("Retrieving " + subUrl); + sendUpdate(STATUS.LOADING_RESOURCE, subUrl); + Document subPage = Http.url(subUrl).get(); + // Get all images in subalbum, add to list. + List subalbumImages = getURLsFromPage(subPage); + logger.info("Found " + subalbumImages.size() + " images in subalbum"); + imageURLs.addAll(subalbumImages); + } catch (IOException e) { + logger.warn("Error while loading subalbum " + subUrl, e); + continue; + } + } + } + else { + // Page contains images + for (Element thumb : page.select("img")) { + // Find thumbnail image source + String image = null; + if (thumb.hasAttr("data-cfsrc")) { + image = thumb.attr("data-cfsrc"); + } + else if (thumb.hasAttr("src")) { + image = thumb.attr("src"); + } + else { + logger.warn("Thumb does not have data-cfsrc or src: " + thumb); + continue; + } + // Remove relative directory path naming + image = image.replaceAll("\\.\\./", ""); + if (image.startsWith("//")) { + image = "http:" + image; + } + // Convert from thumb URL to full-size + if (image.contains("-cu_")) { + image = image.replaceAll("-cu_[^.]+", "-me"); + } + imageURLs.add(image); + } + } + return imageURLs; + } + + @Override + public void downloadURL(URL url, int index) { + addURLToDownload(url, getPrefix(index), "", this.url.toExternalForm(), cookies); + } + + @Override + public String getPrefix(int index) { + return String.format("%03d_", index); + } } diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/FapprovedRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/FapprovedRipper.java index 79185d49..3a875e3a 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/FapprovedRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/FapprovedRipper.java @@ -3,82 +3,33 @@ package com.rarchives.ripme.ripper.rippers; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; +import java.util.ArrayList; +import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; -import com.rarchives.ripme.ripper.AlbumRipper; -import com.rarchives.ripme.ui.RipStatusMessage.STATUS; +import com.rarchives.ripme.ripper.AbstractHTMLRipper; import com.rarchives.ripme.utils.Http; -import com.rarchives.ripme.utils.Utils; -public class FapprovedRipper extends AlbumRipper { +public class FapprovedRipper extends AbstractHTMLRipper { - private static final String DOMAIN = "fapproved.com", - HOST = "fapproved"; + private int pageIndex = 1; + private String username = null; public FapprovedRipper(URL url) throws IOException { super(url); } - @Override - public boolean canRip(URL url) { - return (url.getHost().endsWith(DOMAIN)); - } - - @Override - public URL sanitizeURL(URL url) throws MalformedURLException { - Pattern p = Pattern.compile("^https?://fapproved\\.com/users/([a-zA-Z0-9\\-_]{1,}).*$"); - Matcher m = p.matcher(url.toExternalForm()); - if (m.matches()) { - return new URL("http://fapproved.com/users/" + m.group(1)); - } - throw new MalformedURLException("Expected username in URL (fapproved.com/users/username and not " + url); - } - @Override - public void rip() throws IOException { - int index = 0, page = 0; - String url, user = getGID(this.url); - boolean hasNextPage = true; - while (hasNextPage) { - page++; - url = "http://fapproved.com/users/" + user + "/images?page=" + page; - this.sendUpdate(STATUS.LOADING_RESOURCE, url); - logger.info(" Retrieving " + url); - Document doc = Http.url(url) - .ignoreContentType() - .get(); - for (Element image : doc.select("div.actual-image img")) { - String imageUrl = image.attr("src"); - if (imageUrl.startsWith("//")) { - imageUrl = "http:" + imageUrl; - } - index++; - String prefix = ""; - if (Utils.getConfigBoolean("download.save_order", true)) { - prefix = String.format("%03d_", index); - } - addURLToDownload(new URL(imageUrl), prefix); - } - if ( (doc.select("div.pagination li.next.disabled").size() != 0) - || (doc.select("div.pagination").size() == 0) ) { - break; - } - try { - Thread.sleep(3000); - } catch (InterruptedException e) { - logger.error("[!] Interrupted while waiting to load next album:", e); - break; - } - } - waitForThreads(); - } - @Override public String getHost() { - return HOST; + return "fapproved"; + } + @Override + public String getDomain() { + return "fapproved.com"; } @Override @@ -86,9 +37,62 @@ public class FapprovedRipper extends AlbumRipper { Pattern p = Pattern.compile("^https?://[w.]*fapproved.com/users/([a-zA-Z0-9\\-_]{3,}).*$"); Matcher m = p.matcher(url.toExternalForm()); if (m.matches()) { - return m.group(1); + username = m.group(1); + return username; } throw new MalformedURLException("Fapproved user not found in " + url + ", expected http://fapproved.com/users/username/images"); } + @Override + public URL sanitizeURL(URL url) throws MalformedURLException { + return new URL("http://fapproved.com/users/" + getGID(url)); + } + + @Override + public Document getFirstPage() throws IOException { + pageIndex = 1; + String pageURL = getPageURL(pageIndex); + return Http.url(pageURL) + .ignoreContentType() + .get(); + } + + @Override + public Document getNextPage(Document doc) throws IOException { + if ( (doc.select("div.pagination li.next.disabled").size() != 0) + || (doc.select("div.pagination").size() == 0) ) { + throw new IOException("No more pages found"); + } + sleep(1000); + pageIndex++; + String pageURL = getPageURL(pageIndex); + return Http.url(pageURL) + .ignoreContentType() + .get(); + } + + private String getPageURL(int index) throws IOException { + if (username == null) { + username = getGID(this.url); + } + return "http://fapproved.com/users/" + username + "/images?page=" + pageIndex; + } + + @Override + public List getURLsFromPage(Document page) { + List imageURLs = new ArrayList(); + for (Element image : page.select("div.actual-image img")) { + String imageURL = image.attr("src"); + if (imageURL.startsWith("//")) { + imageURL = "http:" + imageURL; + } + imageURLs.add(imageURL); + } + return imageURLs; + } + + @Override + public void downloadURL(URL url, int index) { + addURLToDownload(url, getPrefix(index)); + } } diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/FlickrRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/FlickrRipper.java index 00cf63a8..19c72ffe 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/FlickrRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/FlickrRipper.java @@ -3,8 +3,10 @@ package com.rarchives.ripme.ripper.rippers; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; +import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; +import java.util.List; import java.util.Map; import java.util.Set; import java.util.regex.Matcher; @@ -17,19 +19,22 @@ import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; -import com.rarchives.ripme.ripper.AlbumRipper; +import com.rarchives.ripme.ripper.AbstractHTMLRipper; import com.rarchives.ripme.ripper.DownloadThreadPool; import com.rarchives.ripme.utils.Base64; import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.Utils; -public class FlickrRipper extends AlbumRipper { +public class FlickrRipper extends AbstractHTMLRipper { - private static final String DOMAIN = "flickr.com", - HOST = "flickr"; - - private DownloadThreadPool flickrThreadPool; + private int page = 1; + private Set attempted = new HashSet(); private Document albumDoc = null; + private DownloadThreadPool flickrThreadPool; + @Override + public DownloadThreadPool getThreadPool() { + return flickrThreadPool; + } public FlickrRipper(URL url) throws IOException { super(url); @@ -38,7 +43,11 @@ public class FlickrRipper extends AlbumRipper { @Override public String getHost() { - return HOST; + return "flickr"; + } + @Override + public String getDomain() { + return "flickr.com"; } public URL sanitizeURL(URL url) throws MalformedURLException { @@ -61,15 +70,13 @@ public class FlickrRipper extends AlbumRipper { } try { // Attempt to use album title as GID - if (albumDoc == null) { - albumDoc = Http.url(url).get(); - } + Document doc = getFirstPage(); String user = url.toExternalForm(); user = user.substring(user.indexOf("/photos/") + "/photos/".length()); user = user.substring(0, user.indexOf("/")); - String title = albumDoc.select("meta[name=description]").get(0).attr("content"); + String title = doc.select("meta[name=description]").get(0).attr("content"); if (!title.equals("")) { - return HOST + "_" + user + "_" + title; + return getHost() + "_" + user + "_" + title; } } catch (Exception e) { // Fall back to default album naming convention @@ -114,79 +121,79 @@ public class FlickrRipper extends AlbumRipper { } @Override - public void rip() throws IOException { - //Map cookies = signinToFlickr(); - Set attempted = new HashSet(); - int index = 0, page = 1; - String nextURL = this.url.toExternalForm(); - while (true) { - if (isStopped()) { - break; - } - logger.info(" Retrieving " + nextURL); - if (albumDoc == null) { - albumDoc = Http.url(nextURL).get(); - } - for (Element thumb : albumDoc.select("a[data-track=photo-click]")) { - String imageTitle = null; - if (thumb.hasAttr("title")) { - imageTitle = thumb.attr("title"); - } - String imagePage = thumb.attr("href"); - if (imagePage.startsWith("/")) { - imagePage = "http://www.flickr.com" + imagePage; - } - if (imagePage.contains("/in/")) { - imagePage = imagePage.substring(0, imagePage.indexOf("/in/") + 1); - } - if (!imagePage.endsWith("/")) { - imagePage += "/"; - } - imagePage += "sizes/o/"; - - // Check for duplicates - if (attempted.contains(imagePage)) { - continue; - } - attempted.add(imagePage); - - index += 1; - // Add image page to threadpool to grab the image & download it - FlickrImageThread mit = new FlickrImageThread(new URL(imagePage), imageTitle, index); - flickrThreadPool.addThread(mit); - } - // Find how many pages there are - int lastPage = 0; - for (Element apage : albumDoc.select("a[data-track^=page-]")) { - String lastPageStr = apage.attr("data-track").replace("page-", ""); - lastPage = Integer.parseInt(lastPageStr); - } - // If we're at the last page, stop. - if (page >= lastPage) { - break; - } - // Load the next page - page++; - albumDoc = null; - nextURL = this.url.toExternalForm(); - if (!nextURL.endsWith("/")) { - nextURL += "/"; - } - nextURL += "page" + page + "/"; - // Wait a bit - try { - Thread.sleep(1000); - } catch (InterruptedException e) { - logger.error("Interrupted while waiting to load next page " + nextURL, e); - break; - } + public Document getFirstPage() throws IOException { + if (albumDoc == null) { + albumDoc = Http.url(url).get(); } - flickrThreadPool.waitForThreads(); - waitForThreads(); + return albumDoc; } - public boolean canRip(URL url) { - return url.getHost().endsWith(DOMAIN); + @Override + public Document getNextPage(Document doc) throws IOException { + // Find how many pages there are + int lastPage = 0; + for (Element apage : doc.select("a[data-track^=page-]")) { + String lastPageStr = apage.attr("data-track").replace("page-", ""); + lastPage = Integer.parseInt(lastPageStr); + } + // If we're at the last page, stop. + if (page >= lastPage) { + throw new IOException("No more pages"); + } + // Load the next page + page++; + albumDoc = null; + String nextURL = this.url.toExternalForm(); + if (!nextURL.endsWith("/")) { + nextURL += "/"; + } + nextURL += "page" + page + "/"; + // Wait a bit + try { + Thread.sleep(1000); + } catch (InterruptedException e) { + throw new IOException("Interrupted while waiting to load next page " + nextURL); + } + return Http.url(nextURL).get(); + } + + @Override + public List getURLsFromPage(Document page) { + List imageURLs = new ArrayList(); + for (Element thumb : page.select("a[data-track=photo-click]")) { + /* TODO find a way to persist the image title + String imageTitle = null; + if (thumb.hasAttr("title")) { + imageTitle = thumb.attr("title"); + } + */ + String imagePage = thumb.attr("href"); + if (imagePage.startsWith("/")) { + imagePage = "http://www.flickr.com" + imagePage; + } + if (imagePage.contains("/in/")) { + imagePage = imagePage.substring(0, imagePage.indexOf("/in/") + 1); + } + if (!imagePage.endsWith("/")) { + imagePage += "/"; + } + imagePage += "sizes/o/"; + + // Check for duplicates + if (attempted.contains(imagePage)) { + continue; + } + attempted.add(imagePage); + imageURLs.add(imagePage); + } + return imageURLs; + } + + @Override + public void downloadURL(URL url, int index) { + // Add image page to threadpool to grab the image & download it + FlickrImageThread mit = new FlickrImageThread(url, index); + flickrThreadPool.addThread(mit); } /** @@ -224,13 +231,11 @@ public class FlickrRipper extends AlbumRipper { */ private class FlickrImageThread extends Thread { private URL url; - private String title; private int index; - public FlickrImageThread(URL url, String title, int index) { + public FlickrImageThread(URL url, int index) { super(); this.url = url; - this.title = title; this.index = index; } @@ -248,9 +253,8 @@ public class FlickrRipper extends AlbumRipper { if (Utils.getConfigBoolean("download.save_order", true)) { prefix = String.format("%03d_", index); } - prefix += Utils.filesystemSafe(title); synchronized (flickrThreadPool) { - addURLToDownload(new URL(fullsizeImages.get(0).attr("src")), prefix); + addURLToDownload(new URL(fullsizeImages.first().attr("src")), prefix); } } } catch (IOException e) { diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/GifyoRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/GifyoRipper.java index a1f92075..93e22d86 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/GifyoRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/GifyoRipper.java @@ -3,7 +3,9 @@ package com.rarchives.ripme.ripper.rippers; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; +import java.util.ArrayList; import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -12,99 +14,27 @@ import org.jsoup.Connection.Method; import org.jsoup.Connection.Response; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; -import org.jsoup.select.Elements; -import com.rarchives.ripme.ripper.AlbumRipper; +import com.rarchives.ripme.ripper.AbstractHTMLRipper; import com.rarchives.ripme.ui.RipStatusMessage.STATUS; import com.rarchives.ripme.utils.Http; -public class GifyoRipper extends AlbumRipper { +public class GifyoRipper extends AbstractHTMLRipper { - private static final String DOMAIN = "gifyo.com", - HOST = "gifyo"; + private int page = 0; + private Map cookies = new HashMap(); public GifyoRipper(URL url) throws IOException { super(url); } - @Override - public boolean canRip(URL url) { - return (url.getHost().endsWith(DOMAIN)); - } - - @Override - public URL sanitizeURL(URL url) throws MalformedURLException { - Pattern p = Pattern.compile("^https?://gifyo\\.com/([a-zA-Z0-9\\-_]+)/?$"); - Matcher m = p.matcher(url.toExternalForm()); - if (m.matches()) { - return new URL("http://gifyo.com/" + m.group(1) + "/"); - } - throw new MalformedURLException("Expected username in URL (gifyo.com/username/ and not " + url); - } - @Override - public void rip() throws IOException { - int page = 0; - Map cookies = new HashMap(); - while (true) { - this.sendUpdate(STATUS.LOADING_RESOURCE, this.url.toExternalForm() + " (page #" + page + ")"); - logger.info(" Retrieving " + this.url + "(page #" + page + ")"); - Response resp = null; - if (page == 0) { - resp = Http.url(this.url) - .ignoreContentType() - .response(); - cookies = resp.cookies(); - } - else { - Map postData = new HashMap(); - postData.put("cmd", "refreshData"); - postData.put("view", "gif"); - postData.put("layout", "grid"); - postData.put("page", Integer.toString(page)); - resp = Http.url(this.url) - .ignoreContentType() - .data(postData) - .cookies(cookies) - .method(Method.POST) - .response(); - cookies.putAll(resp.cookies()); - } - Document doc = resp.parse(); - Elements images = doc.select("div.gif img"); - logger.info("Found " + images.size() + " images"); - for (Element image : images) { - String imageUrl = image.attr("src"); - if (imageUrl.startsWith("//")) { - imageUrl = "http:" + imageUrl; - } - imageUrl = imageUrl.replace("/medium/", "/large/"); - imageUrl = imageUrl.replace("_s.gif", ".gif"); - addURLToDownload(new URL(imageUrl)); - } - if (images.size() == 0) { - if (doc.html().contains("profile is private")) { - sendUpdate(STATUS.RIP_ERRORED, "User has private profile"); - throw new IOException("User has private profile"); - } - else { - logger.info("Page " + page + " has 0 images"); - } - break; - } - try { - Thread.sleep(3000); - } catch (InterruptedException e) { - logger.error("[!] Interrupted while waiting to load next album:", e); - break; - } - page++; - } - waitForThreads(); - } - @Override public String getHost() { - return HOST; + return "gifyo"; + } + @Override + public String getDomain() { + return "gifyo.com"; } @Override @@ -117,4 +47,67 @@ public class GifyoRipper extends AlbumRipper { throw new MalformedURLException("Gifyo user not found in " + url + ", expected http://gifyo.com/username"); } + @Override + public URL sanitizeURL(URL url) throws MalformedURLException { + return new URL("http://gifyo.com/" + getGID(url) + "/"); + } + + @Override + public Document getFirstPage() throws IOException { + Response resp = Http.url(this.url) + .ignoreContentType() + .response(); + cookies = resp.cookies(); + + Document doc = resp.parse(); + if (doc.html().contains("profile is private")) { + sendUpdate(STATUS.RIP_ERRORED, "User has private profile"); + throw new IOException("User has private profile"); + } + return doc; + } + + @Override + public Document getNextPage(Document doc) throws IOException { + page++; + Map postData = new HashMap(); + postData.put("cmd", "refreshData"); + postData.put("view", "gif"); + postData.put("layout", "grid"); + postData.put("page", Integer.toString(page)); + Response resp = Http.url(this.url) + .ignoreContentType() + .data(postData) + .cookies(cookies) + .method(Method.POST) + .response(); + cookies.putAll(resp.cookies()); + Document nextDoc = resp.parse(); + if (nextDoc.select("div.gif img").size() == 0) { + throw new IOException("No more images found"); + } + sleep(2000); + return nextDoc; + } + + @Override + public List getURLsFromPage(Document doc) { + List imageURLs = new ArrayList(); + for (Element image : doc.select("div.gif img")) { + String imageUrl = image.attr("src"); + if (imageUrl.startsWith("//")) { + imageUrl = "http:" + imageUrl; + } + imageUrl = imageUrl.replace("/medium/", "/large/"); + imageUrl = imageUrl.replace("_s.gif", ".gif"); + imageURLs.add(imageUrl); + } + logger.info("Found " + imageURLs.size() + " images"); + return imageURLs; + } + + @Override + public void downloadURL(URL url, int index) { + addURLToDownload(url); + } } diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/GirlsOfDesireRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/GirlsOfDesireRipper.java index 84ea2936..f5ffd6c2 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/GirlsOfDesireRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/GirlsOfDesireRipper.java @@ -3,6 +3,8 @@ package com.rarchives.ripme.ripper.rippers; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; +import java.util.ArrayList; +import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -10,16 +12,14 @@ import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; -import com.rarchives.ripme.ripper.AlbumRipper; +import com.rarchives.ripme.ripper.AbstractSinglePageRipper; import com.rarchives.ripme.ui.RipStatusMessage.STATUS; import com.rarchives.ripme.utils.Http; -public class GirlsOfDesireRipper extends AlbumRipper { +public class GirlsOfDesireRipper extends AbstractSinglePageRipper { // All sleep times are in milliseconds private static final int IMAGE_SLEEP_TIME = 100; - private static final String DOMAIN = "girlsofdesire.org", HOST = "GirlsOfDesire"; - // Current HTML document private Document albumDoc = null; @@ -29,23 +29,19 @@ public class GirlsOfDesireRipper extends AlbumRipper { @Override public String getHost() { - return HOST; + return "GirlsOfDesire"; } - - public URL sanitizeURL(URL url) throws MalformedURLException { - return url; + @Override + public String getDomain() { + return "girlsofdesire.org"; } public String getAlbumTitle(URL url) throws MalformedURLException { try { // Attempt to use album title as GID - if (albumDoc == null) { - logger.info(" Retrieving " + url.toExternalForm()); - sendUpdate(STATUS.LOADING_RESOURCE, url.toString()); - albumDoc = Http.url(url).get(); - } - Elements elems = albumDoc.select(".albumName"); - return HOST + "_" + elems.first().text(); + Document doc = getFirstPage(); + Elements elems = doc.select(".albumName"); + return getHost() + "_" + elems.first().text(); } catch (Exception e) { // Fall back to default album naming convention logger.warn("Failed to get album title from " + url, e); @@ -69,6 +65,33 @@ public class GirlsOfDesireRipper extends AlbumRipper { + "http://www.girlsofdesire.org/galleries//" + " Got: " + url); } + + @Override + public Document getFirstPage() throws IOException { + if (albumDoc == null) { + albumDoc = Http.url(url).get(); + } + return albumDoc; + } + + @Override + public List getURLsFromPage(Document doc) { + List imageURLs = new ArrayList(); + for (Element thumb : doc.select("td.vtop > a > img")) { + String imgSrc = thumb.attr("src"); + imgSrc = imgSrc.replaceAll("_thumb\\.", "."); + if (imgSrc.startsWith("/")) { + imgSrc = "http://www.girlsofdesire.org" + imgSrc; + } + imageURLs.add(imgSrc); + } + return imageURLs; + } + + @Override + public void downloadURL(URL url, int index) { + addURLToDownload(url, getPrefix(index)); + } @Override public void rip() throws IOException { @@ -107,8 +130,4 @@ public class GirlsOfDesireRipper extends AlbumRipper { waitForThreads(); } - - public boolean canRip(URL url) { - return url.getHost().endsWith(DOMAIN); - } } \ No newline at end of file diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/GonewildRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/GonewildRipper.java index eb072186..0341b274 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/GonewildRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/GonewildRipper.java @@ -3,21 +3,22 @@ package com.rarchives.ripme.ripper.rippers; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; +import java.util.ArrayList; +import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.json.JSONArray; import org.json.JSONObject; -import com.rarchives.ripme.ripper.AlbumRipper; +import com.rarchives.ripme.ripper.AbstractJSONRipper; import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.Utils; -public class GonewildRipper extends AlbumRipper { +public class GonewildRipper extends AbstractJSONRipper { - private static final String HOST = "gonewild"; - private static final int SLEEP_TIME = 1000; - + private static final int count = 50; + private int startIndex = 0; private static String API_DOMAIN; private String username; @@ -26,6 +27,15 @@ public class GonewildRipper extends AlbumRipper { API_DOMAIN = Utils.getConfigString("gw.api", "gonewild"); } + @Override + public String getHost() { + return "gonewild"; + } + @Override + public String getDomain() { + return "gonewild.com"; + } + @Override public boolean canRip(URL url) { return getUsernameMatcher(url).matches(); @@ -36,72 +46,64 @@ public class GonewildRipper extends AlbumRipper { return p.matcher(url.toExternalForm()); } - @Override - public URL sanitizeURL(URL url) throws MalformedURLException { - return url; - } - - @Override - public void rip() throws IOException { - int start = 0, - count = 50; - String baseGwURL = "http://" + API_DOMAIN + ".rarchives.com/api.cgi" - + "?method=get_user" - + "&user=" + username - + "&count=" + count; - String gwURL, imagePath; - JSONArray posts, images; - JSONObject json, post, image; - while (true) { - logger.info(" Retrieving posts by " + username); - gwURL = baseGwURL - + "&start=" + start; - start += count; - json = Http.url(gwURL) - .getJSON(); - if (json.has("error")) { - logger.error("Error while retrieving user posts:" + json.getString("error")); - break; - } - posts = json.getJSONArray("posts"); - if (posts.length() == 0) { - break; // No more posts to get - } - for (int i = 0; i < posts.length(); i++) { - post = (JSONObject) posts.get(i); - images = post.getJSONArray("images"); - for (int j = 0; j < images.length(); j++) { - image = (JSONObject) images.get(j); - imagePath = image.getString("path"); - if (imagePath.startsWith("..")) { - imagePath = imagePath.substring(2); - } - imagePath = "http://" + API_DOMAIN + ".rarchives.com" + imagePath; - logger.info(" Found file: " + imagePath); - addURLToDownload(new URL(imagePath)); - } - } - try { - Thread.sleep(SLEEP_TIME); - } catch (InterruptedException e) { - logger.error("[!] Interrupted while waiting to load more posts", e); - break; - } - } - waitForThreads(); - } - - @Override - public String getHost() { - return HOST; - } - @Override public String getGID(URL url) throws MalformedURLException { Matcher m = getUsernameMatcher(url); if (m.matches()) { this.username = m.group(m.groupCount()); } + else { + throw new MalformedURLException("Expected format: gonewild.com/"); + } return username; } + + @Override + public JSONObject getFirstPage() throws IOException { + String gwURL = "http://" + API_DOMAIN + ".rarchives.com/api.cgi" + + "?method=get_user" + + "&user=" + username + + "&count=" + count + + "&start=" + startIndex; + JSONObject nextJSON = Http.url(gwURL).getJSON(); + if (nextJSON.has("error")) { + throw new IOException(nextJSON.getString("error")); + } + if (nextJSON.getJSONArray("posts").length() == 0) { + throw new IOException("No posts found"); + } + return nextJSON; + } + + @Override + public JSONObject getNextPage(JSONObject json) throws IOException { + startIndex += count; + sleep(1000); + return getFirstPage(); + } + + @Override + public List getURLsFromJSON(JSONObject json) { + List imageURLs = new ArrayList(); + JSONArray posts = json.getJSONArray("posts"); + for (int i = 0; i < posts.length(); i++) { + JSONObject post = posts.getJSONObject(i); + JSONArray images = post.getJSONArray("images"); + for (int j = 0; j < images.length(); j++) { + JSONObject image = images.getJSONObject(j); + String imagePath = image.getString("path"); + if (imagePath.startsWith("..")) { + imagePath = imagePath.substring(2); + } + imagePath = "http://" + API_DOMAIN + ".rarchives.com" + imagePath; + imageURLs.add(imagePath); + } + } + return imageURLs; + } + + @Override + public void downloadURL(URL url, int index) { + addURLToDownload(url, getPrefix(index)); + } }