diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/VkRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/VkRipper.java index b2472cc9..99310dc4 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/VkRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/VkRipper.java @@ -3,33 +3,171 @@ package com.rarchives.ripme.ripper.rippers; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; +import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; +import com.rarchives.ripme.ripper.AbstractJSONRipper; import org.json.JSONArray; import org.json.JSONObject; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; -import com.rarchives.ripme.ripper.AlbumRipper; import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.Utils; -public class VkRipper extends AlbumRipper { +public class VkRipper extends AbstractJSONRipper { private static final String DOMAIN = "vk.com", HOST = "vk"; + enum RipType { VIDEO, IMAGE } + + private RipType RIP_TYPE; + private String oid; + public VkRipper(URL url) throws IOException { super(url); } + @Override + public String getHost() { + return HOST; + } + + @Override + protected String getDomain() { + return DOMAIN; + } + + @Override + protected JSONObject getFirstPage() throws IOException { + if (RIP_TYPE == RipType.VIDEO) { + oid = getGID(this.url).replace("videos", ""); + String u = "http://vk.com/al_video.php"; + Map postData = new HashMap<>(); + postData.put("al", "1"); + postData.put("act", "load_videos_silent"); + postData.put("offset", "0"); + postData.put("oid", oid); + Document doc = Http.url(u) + .referrer(this.url) + .ignoreContentType() + .data(postData) + .post(); + String[] jsonStrings = doc.toString().split(""); + return new JSONObject(jsonStrings[jsonStrings.length - 1]); + } else { + Map photoIDsToURLs = new HashMap<>(); + int offset = 0; + while (true) { + LOGGER.info(" Retrieving " + this.url); + Map postData = new HashMap<>(); + postData.put("al", "1"); + postData.put("offset", Integer.toString(offset)); + postData.put("part", "1"); + Document doc = Http.url(this.url) + .referrer(this.url) + .ignoreContentType() + .data(postData) + .post(); + + String body = doc.toString(); + if (!body.contains(" elements = doc.select("a"); + Set photoIDsToGet = new HashSet<>(); + for (Element a : elements) { + if (!a.attr("onclick").contains("showPhoto('")) { + LOGGER.error("a: " + a); + continue; + } + String photoID = a.attr("onclick"); + photoID = photoID.substring(photoID.indexOf("showPhoto('") + "showPhoto('".length()); + photoID = photoID.substring(0, photoID.indexOf("'")); + if (!photoIDsToGet.contains(photoID)) { + photoIDsToGet.add(photoID); + } + } + for (String photoID : photoIDsToGet) { + if (!photoIDsToURLs.containsKey(photoID)) { + try { + photoIDsToURLs.putAll(getPhotoIDsToURLs(photoID)); + } catch (IOException e) { + LOGGER.error("Exception while retrieving photo id " + photoID, e); + continue; + } + } + if (!photoIDsToURLs.containsKey(photoID)) { + LOGGER.error("Could not find URL for photo ID: " + photoID); + continue; + } + if (isStopped() || isThisATest()) { + break; + } + } + + if (elements.size() < 40 || isStopped() || isThisATest()) { + break; + } + offset += elements.size(); + } + // Slight hack to make this into effectively a JSON ripper + return new JSONObject(photoIDsToURLs); + } + } + + @Override + protected List getURLsFromJSON(JSONObject page) { + List pageURLs = new ArrayList<>(); + if (RIP_TYPE == RipType.VIDEO) { + JSONArray videos = page.getJSONArray("all"); + LOGGER.info("Found " + videos.length() + " videos"); + + for (int i = 0; i < videos.length(); i++) { + JSONArray jsonVideo = videos.getJSONArray(i); + int vidid = jsonVideo.getInt(1); + String videoURL; + try { + videoURL = com.rarchives.ripme.ripper.rippers.video.VkRipper.getVideoURLAtPage( + "http://vk.com/video" + oid + "_" + vidid); + } catch (IOException e) { + LOGGER.error("Error while ripping video id: " + vidid); + return pageURLs; + } + pageURLs.add(videoURL); + } + } else { + Iterator keys = page.keys(); + while (keys.hasNext()) { + pageURLs.add(page.getString((String) keys.next())); + } + } + return pageURLs; + } + + @Override + protected void downloadURL(URL url, int index) { + if (RIP_TYPE == RipType.VIDEO) { + String prefix = ""; + if (Utils.getConfigBoolean("download.save_order", true)) { + prefix = String.format("%03d_", index + 1); + } + addURLToDownload(url, prefix); + try { + Thread.sleep(500); + } catch (InterruptedException e) { + LOGGER.error("Interrupted while waiting to fetch next video URL", e); + } + } else { + addURLToDownload(url); + } + } + @Override public boolean canRip(URL url) { if (!url.getHost().endsWith(DOMAIN)) { @@ -48,115 +186,19 @@ public class VkRipper extends AlbumRipper { @Override public void rip() throws IOException { if (this.url.toExternalForm().contains("/videos")) { - ripVideos(); + RIP_TYPE = RipType.VIDEO; + JSONObject json = getFirstPage(); + List URLs = getURLsFromJSON(json); + for (int index = 0; index < URLs.size(); index ++) { + downloadURL(new URL(URLs.get(index)), index); + } + waitForThreads(); } else { - ripImages(); + RIP_TYPE = RipType.IMAGE; } } - private void ripVideos() throws IOException { - String oid = getGID(this.url).replace("videos", ""); - String u = "http://vk.com/al_video.php"; - Map postData = new HashMap<>(); - postData.put("al", "1"); - postData.put("act", "load_videos_silent"); - postData.put("offset", "0"); - postData.put("oid", oid); - Document doc = Http.url(u) - .referrer(this.url) - .ignoreContentType() - .data(postData) - .post(); - String[] jsonStrings = doc.toString().split(""); - JSONObject json = new JSONObject(jsonStrings[jsonStrings.length - 1]); - JSONArray videos = json.getJSONArray("all"); - LOGGER.info("Found " + videos.length() + " videos"); - for (int i = 0; i < videos.length(); i++) { - JSONArray jsonVideo = videos.getJSONArray(i); - int vidid = jsonVideo.getInt(1); - String videoURL = com.rarchives.ripme.ripper.rippers.video.VkRipper.getVideoURLAtPage( - "http://vk.com/video" + oid + "_" + vidid); - String prefix = ""; - if (Utils.getConfigBoolean("download.save_order", true)) { - prefix = String.format("%03d_", i + 1); - } - addURLToDownload(new URL(videoURL), prefix); - try { - Thread.sleep(500); - } catch (InterruptedException e) { - LOGGER.error("Interrupted while waiting to fetch next video URL", e); - break; - } - } - waitForThreads(); - } - - private void ripImages() throws IOException { - Map photoIDsToURLs = new HashMap<>(); - int offset = 0; - while (true) { - LOGGER.info(" Retrieving " + this.url); - - // al=1&offset=80&part=1 - Map postData = new HashMap<>(); - postData.put("al", "1"); - postData.put("offset", Integer.toString(offset)); - postData.put("part", "1"); - Document doc = Http.url(this.url) - .referrer(this.url) - .ignoreContentType() - .data(postData) - .post(); - - String body = doc.toString(); - if (!body.contains(" elements = doc.select("a"); - Set photoIDsToGet = new HashSet<>(); - for (Element a : elements) { - if (!a.attr("onclick").contains("showPhoto('")) { - LOGGER.error("a: " + a); - continue; - } - String photoID = a.attr("onclick"); - photoID = photoID.substring(photoID.indexOf("showPhoto('") + "showPhoto('".length()); - photoID = photoID.substring(0, photoID.indexOf("'")); - if (!photoIDsToGet.contains(photoID)) { - photoIDsToGet.add(photoID); - } - } - for (String photoID : photoIDsToGet) { - if (!photoIDsToURLs.containsKey(photoID)) { - try { - photoIDsToURLs.putAll(getPhotoIDsToURLs(photoID)); - } catch (IOException e) { - LOGGER.error("Exception while retrieving photo id " + photoID, e); - continue; - } - } - if (!photoIDsToURLs.containsKey(photoID)) { - LOGGER.error("Could not find URL for photo ID: " + photoID); - continue; - } - String url = photoIDsToURLs.get(photoID); - addURLToDownload(new URL(url)); - if (isStopped() || isThisATest()) { - break; - } - } - - if (elements.size() < 40 || isStopped() || isThisATest()) { - break; - } - offset += elements.size(); - } - waitForThreads(); - } - private Map getPhotoIDsToURLs(String photoID) throws IOException { Map photoIDsToURLs = new HashMap<>(); Map postData = new HashMap<>(); @@ -191,11 +233,6 @@ public class VkRipper extends AlbumRipper { return photoIDsToURLs; } - @Override - public String getHost() { - return HOST; - } - @Override public String getGID(URL url) throws MalformedURLException { Pattern p = Pattern.compile("^https?://(www\\.)?vk\\.com/(photos|album|videos)-?([a-zA-Z0-9_]+).*$");