From c17a4d48551f8bdb16ec679e91c1913d6846f095 Mon Sep 17 00:00:00 2001 From: buzzlightmonth <44553885+buzzlightmonth@users.noreply.github.com> Date: Wed, 23 Jan 2019 16:37:37 +0100 Subject: [PATCH 1/3] Removed unused imports of AlbumRipper --- .../com/rarchives/ripme/ripper/rippers/NewsfilterRipper.java | 4 ---- .../java/com/rarchives/ripme/ripper/rippers/NfsfwRipper.java | 3 --- .../com/rarchives/ripme/ripper/rippers/PornhubRipper.java | 2 -- 3 files changed, 9 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/NewsfilterRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/NewsfilterRipper.java index eee733db..bafa3690 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/NewsfilterRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/NewsfilterRipper.java @@ -10,14 +10,10 @@ import java.util.regex.Pattern; import com.rarchives.ripme.ripper.AbstractHTMLRipper; import com.rarchives.ripme.utils.Http; -import org.jsoup.Connection; -import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; -import com.rarchives.ripme.ripper.AlbumRipper; - public class NewsfilterRipper extends AbstractHTMLRipper { private static final String HOST = "newsfilter"; diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/NfsfwRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/NfsfwRipper.java index b525a39a..86079edc 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/NfsfwRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/NfsfwRipper.java @@ -13,11 +13,8 @@ import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; -import com.rarchives.ripme.ripper.AlbumRipper; import com.rarchives.ripme.ripper.DownloadThreadPool; -import com.rarchives.ripme.ui.RipStatusMessage.STATUS; import com.rarchives.ripme.utils.Http; -import com.rarchives.ripme.utils.Utils; public class NfsfwRipper extends AbstractHTMLRipper { diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/PornhubRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/PornhubRipper.java index eb7a421b..197bdcbd 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/PornhubRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/PornhubRipper.java @@ -14,9 +14,7 @@ import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; -import com.rarchives.ripme.ripper.AlbumRipper; import com.rarchives.ripme.ripper.DownloadThreadPool; -import com.rarchives.ripme.ui.RipStatusMessage.STATUS; import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.Utils; From 3b22af3cda5b31d433e8e6451ad1c5091783ee1b Mon Sep 17 00:00:00 2001 From: buzzlightmonth <44553885+buzzlightmonth@users.noreply.github.com> Date: Wed, 23 Jan 2019 17:54:13 +0100 Subject: [PATCH 2/3] Removed references of AlbumRipper in the MotherlessRipper --- .../ripper/rippers/MotherlessRipper.java | 84 ++++++++++++------- 1 file changed, 56 insertions(+), 28 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/MotherlessRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/MotherlessRipper.java index b2fee8e5..9b71d756 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/MotherlessRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/MotherlessRipper.java @@ -3,19 +3,21 @@ package com.rarchives.ripme.ripper.rippers; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; +import java.util.ArrayList; +import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; -import com.rarchives.ripme.ripper.AlbumRipper; +import com.rarchives.ripme.ripper.AbstractHTMLRipper; import com.rarchives.ripme.ripper.DownloadThreadPool; import com.rarchives.ripme.ui.RipStatusMessage.STATUS; import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.Utils; -public class MotherlessRipper extends AlbumRipper { +public class MotherlessRipper extends AbstractHTMLRipper { private static final String DOMAIN = "motherless.com", HOST = "motherless"; @@ -37,6 +39,52 @@ public class MotherlessRipper extends AlbumRipper { return url.getHost().endsWith(DOMAIN); } + @Override + protected String getDomain() { + return DOMAIN; + } + + @Override + protected Document getFirstPage() throws IOException { + return Http.url(url).referrer("http://motherless.com").get(); + } + + @Override + protected List getURLsFromPage(Document page) { + List pageURLs = new ArrayList<>(); + + for (Element thumb : page.select("div.thumb a.img-container")) { + if (isStopped()) { + break; + } + String thumbURL = thumb.attr("href"); + if (thumbURL.contains("pornmd.com")) { + continue; + } + + String url; + if (!thumbURL.startsWith("http")) { + url = "http://" + DOMAIN + thumbURL; + } else { + url = thumbURL; + } + pageURLs.add(url); + + if (isThisATest()) { + break; + } + } + + return pageURLs; + } + + @Override + protected void downloadURL(URL url, int index) { + // Create thread for finding image at "url" page + MotherlessImageThread mit = new MotherlessImageThread(url, index); + motherlessThreadPool.addThread(mit); + } + @Override public String getHost() { return HOST; @@ -77,34 +125,14 @@ public class MotherlessRipper extends AlbumRipper { } LOGGER.info("Retrieving " + nextURL); sendUpdate(STATUS.LOADING_RESOURCE, nextURL); - Document doc = Http.url(nextURL) - .referrer("http://motherless.com") - .get(); - for (Element thumb : doc.select("div.thumb a.img-container")) { - if (isStopped()) { - break; - } - String thumbURL = thumb.attr("href"); - if (thumbURL.contains("pornmd.com")) { - continue; - } - URL url; - if (!thumbURL.startsWith("http")) { - url = new URL("http://" + DOMAIN + thumbURL); - } - else { - url = new URL(thumbURL); - } - index += 1; + Document doc = getFirstPage(); + List URLs = getURLsFromPage(doc); - // Create thread for finding image at "url" page - MotherlessImageThread mit = new MotherlessImageThread(url, index); - motherlessThreadPool.addThread(mit); - - if (isThisATest()) { - break; - } + for (String url: URLs) { + downloadURL(new URL(url), index); + index ++; } + if (isThisATest()) { break; } From e84b776b4e2241ead783ab165ce543cf06ab4e4d Mon Sep 17 00:00:00 2001 From: buzzlightmonth <44553885+buzzlightmonth@users.noreply.github.com> Date: Wed, 23 Jan 2019 18:55:03 +0100 Subject: [PATCH 3/3] [untested] Removed references in VkRipper --- .../ripme/ripper/rippers/VkRipper.java | 269 ++++++++++-------- 1 file changed, 153 insertions(+), 116 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/VkRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/VkRipper.java index b2472cc9..99310dc4 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/VkRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/VkRipper.java @@ -3,33 +3,171 @@ package com.rarchives.ripme.ripper.rippers; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; +import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; +import com.rarchives.ripme.ripper.AbstractJSONRipper; import org.json.JSONArray; import org.json.JSONObject; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; -import com.rarchives.ripme.ripper.AlbumRipper; import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.Utils; -public class VkRipper extends AlbumRipper { +public class VkRipper extends AbstractJSONRipper { private static final String DOMAIN = "vk.com", HOST = "vk"; + enum RipType { VIDEO, IMAGE } + + private RipType RIP_TYPE; + private String oid; + public VkRipper(URL url) throws IOException { super(url); } + @Override + public String getHost() { + return HOST; + } + + @Override + protected String getDomain() { + return DOMAIN; + } + + @Override + protected JSONObject getFirstPage() throws IOException { + if (RIP_TYPE == RipType.VIDEO) { + oid = getGID(this.url).replace("videos", ""); + String u = "http://vk.com/al_video.php"; + Map postData = new HashMap<>(); + postData.put("al", "1"); + postData.put("act", "load_videos_silent"); + postData.put("offset", "0"); + postData.put("oid", oid); + Document doc = Http.url(u) + .referrer(this.url) + .ignoreContentType() + .data(postData) + .post(); + String[] jsonStrings = doc.toString().split(""); + return new JSONObject(jsonStrings[jsonStrings.length - 1]); + } else { + Map photoIDsToURLs = new HashMap<>(); + int offset = 0; + while (true) { + LOGGER.info(" Retrieving " + this.url); + Map postData = new HashMap<>(); + postData.put("al", "1"); + postData.put("offset", Integer.toString(offset)); + postData.put("part", "1"); + Document doc = Http.url(this.url) + .referrer(this.url) + .ignoreContentType() + .data(postData) + .post(); + + String body = doc.toString(); + if (!body.contains(" elements = doc.select("a"); + Set photoIDsToGet = new HashSet<>(); + for (Element a : elements) { + if (!a.attr("onclick").contains("showPhoto('")) { + LOGGER.error("a: " + a); + continue; + } + String photoID = a.attr("onclick"); + photoID = photoID.substring(photoID.indexOf("showPhoto('") + "showPhoto('".length()); + photoID = photoID.substring(0, photoID.indexOf("'")); + if (!photoIDsToGet.contains(photoID)) { + photoIDsToGet.add(photoID); + } + } + for (String photoID : photoIDsToGet) { + if (!photoIDsToURLs.containsKey(photoID)) { + try { + photoIDsToURLs.putAll(getPhotoIDsToURLs(photoID)); + } catch (IOException e) { + LOGGER.error("Exception while retrieving photo id " + photoID, e); + continue; + } + } + if (!photoIDsToURLs.containsKey(photoID)) { + LOGGER.error("Could not find URL for photo ID: " + photoID); + continue; + } + if (isStopped() || isThisATest()) { + break; + } + } + + if (elements.size() < 40 || isStopped() || isThisATest()) { + break; + } + offset += elements.size(); + } + // Slight hack to make this into effectively a JSON ripper + return new JSONObject(photoIDsToURLs); + } + } + + @Override + protected List getURLsFromJSON(JSONObject page) { + List pageURLs = new ArrayList<>(); + if (RIP_TYPE == RipType.VIDEO) { + JSONArray videos = page.getJSONArray("all"); + LOGGER.info("Found " + videos.length() + " videos"); + + for (int i = 0; i < videos.length(); i++) { + JSONArray jsonVideo = videos.getJSONArray(i); + int vidid = jsonVideo.getInt(1); + String videoURL; + try { + videoURL = com.rarchives.ripme.ripper.rippers.video.VkRipper.getVideoURLAtPage( + "http://vk.com/video" + oid + "_" + vidid); + } catch (IOException e) { + LOGGER.error("Error while ripping video id: " + vidid); + return pageURLs; + } + pageURLs.add(videoURL); + } + } else { + Iterator keys = page.keys(); + while (keys.hasNext()) { + pageURLs.add(page.getString((String) keys.next())); + } + } + return pageURLs; + } + + @Override + protected void downloadURL(URL url, int index) { + if (RIP_TYPE == RipType.VIDEO) { + String prefix = ""; + if (Utils.getConfigBoolean("download.save_order", true)) { + prefix = String.format("%03d_", index + 1); + } + addURLToDownload(url, prefix); + try { + Thread.sleep(500); + } catch (InterruptedException e) { + LOGGER.error("Interrupted while waiting to fetch next video URL", e); + } + } else { + addURLToDownload(url); + } + } + @Override public boolean canRip(URL url) { if (!url.getHost().endsWith(DOMAIN)) { @@ -48,115 +186,19 @@ public class VkRipper extends AlbumRipper { @Override public void rip() throws IOException { if (this.url.toExternalForm().contains("/videos")) { - ripVideos(); + RIP_TYPE = RipType.VIDEO; + JSONObject json = getFirstPage(); + List URLs = getURLsFromJSON(json); + for (int index = 0; index < URLs.size(); index ++) { + downloadURL(new URL(URLs.get(index)), index); + } + waitForThreads(); } else { - ripImages(); + RIP_TYPE = RipType.IMAGE; } } - private void ripVideos() throws IOException { - String oid = getGID(this.url).replace("videos", ""); - String u = "http://vk.com/al_video.php"; - Map postData = new HashMap<>(); - postData.put("al", "1"); - postData.put("act", "load_videos_silent"); - postData.put("offset", "0"); - postData.put("oid", oid); - Document doc = Http.url(u) - .referrer(this.url) - .ignoreContentType() - .data(postData) - .post(); - String[] jsonStrings = doc.toString().split(""); - JSONObject json = new JSONObject(jsonStrings[jsonStrings.length - 1]); - JSONArray videos = json.getJSONArray("all"); - LOGGER.info("Found " + videos.length() + " videos"); - for (int i = 0; i < videos.length(); i++) { - JSONArray jsonVideo = videos.getJSONArray(i); - int vidid = jsonVideo.getInt(1); - String videoURL = com.rarchives.ripme.ripper.rippers.video.VkRipper.getVideoURLAtPage( - "http://vk.com/video" + oid + "_" + vidid); - String prefix = ""; - if (Utils.getConfigBoolean("download.save_order", true)) { - prefix = String.format("%03d_", i + 1); - } - addURLToDownload(new URL(videoURL), prefix); - try { - Thread.sleep(500); - } catch (InterruptedException e) { - LOGGER.error("Interrupted while waiting to fetch next video URL", e); - break; - } - } - waitForThreads(); - } - - private void ripImages() throws IOException { - Map photoIDsToURLs = new HashMap<>(); - int offset = 0; - while (true) { - LOGGER.info(" Retrieving " + this.url); - - // al=1&offset=80&part=1 - Map postData = new HashMap<>(); - postData.put("al", "1"); - postData.put("offset", Integer.toString(offset)); - postData.put("part", "1"); - Document doc = Http.url(this.url) - .referrer(this.url) - .ignoreContentType() - .data(postData) - .post(); - - String body = doc.toString(); - if (!body.contains(" elements = doc.select("a"); - Set photoIDsToGet = new HashSet<>(); - for (Element a : elements) { - if (!a.attr("onclick").contains("showPhoto('")) { - LOGGER.error("a: " + a); - continue; - } - String photoID = a.attr("onclick"); - photoID = photoID.substring(photoID.indexOf("showPhoto('") + "showPhoto('".length()); - photoID = photoID.substring(0, photoID.indexOf("'")); - if (!photoIDsToGet.contains(photoID)) { - photoIDsToGet.add(photoID); - } - } - for (String photoID : photoIDsToGet) { - if (!photoIDsToURLs.containsKey(photoID)) { - try { - photoIDsToURLs.putAll(getPhotoIDsToURLs(photoID)); - } catch (IOException e) { - LOGGER.error("Exception while retrieving photo id " + photoID, e); - continue; - } - } - if (!photoIDsToURLs.containsKey(photoID)) { - LOGGER.error("Could not find URL for photo ID: " + photoID); - continue; - } - String url = photoIDsToURLs.get(photoID); - addURLToDownload(new URL(url)); - if (isStopped() || isThisATest()) { - break; - } - } - - if (elements.size() < 40 || isStopped() || isThisATest()) { - break; - } - offset += elements.size(); - } - waitForThreads(); - } - private Map getPhotoIDsToURLs(String photoID) throws IOException { Map photoIDsToURLs = new HashMap<>(); Map postData = new HashMap<>(); @@ -191,11 +233,6 @@ public class VkRipper extends AlbumRipper { return photoIDsToURLs; } - @Override - public String getHost() { - return HOST; - } - @Override public String getGID(URL url) throws MalformedURLException { Pattern p = Pattern.compile("^https?://(www\\.)?vk\\.com/(photos|album|videos)-?([a-zA-Z0-9_]+).*$");