From 7b57d3cbfd5d45bfd8a79cea12fb2f118f07346b Mon Sep 17 00:00:00 2001 From: MrPlaygon <-> Date: Sat, 16 Feb 2019 18:42:46 +0100 Subject: [PATCH 1/9] Refactoring of DeviantartRipper. It is now a HTML Ripper. NOT using Deviantart API like the old JSON ripper because it is SLOW and somehow annoying to use. Things to consider: Using the API might be less work/maintenance later because APIs do not change as frequently as HTML source code...? --- .../ripper/rippers/DeviantartRipper.java | 831 ++++++++++-------- .../ripper/rippers/DeviantartRipperTest.java | 5 +- 2 files changed, 484 insertions(+), 352 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java index ad7d79fa..a81c0656 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java @@ -1,409 +1,540 @@ package com.rarchives.ripme.ripper.rippers; -import com.rarchives.ripme.ripper.AbstractJSONRipper; -import com.rarchives.ripme.utils.Base64; +import com.rarchives.ripme.ripper.AbstractHTMLRipper; +import com.rarchives.ripme.ripper.DownloadThreadPool; +import com.rarchives.ripme.ui.RipStatusMessage.STATUS; import com.rarchives.ripme.utils.Http; -import com.rarchives.ripme.utils.RipUtils; -import com.rarchives.ripme.utils.Utils; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; import java.io.IOException; -import java.net.HttpURLConnection; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; -import java.util.Arrays; import java.util.HashMap; -import java.util.HashSet; import java.util.List; import java.util.Map; -import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.json.JSONArray; -import org.json.JSONObject; +import org.jsoup.Connection.Method; import org.jsoup.Connection.Response; -import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; +/** + * + * @author MrPlaygon + * + * NOT using Deviantart API like the old JSON ripper because it is SLOW + * and somehow annoying to use. + * + * + * Things to consider: Using the API might be less work/maintenance later because APIs do not change as frequently as HTML source code...? + * + * + * + * Tested for: + * + * SFW: + * + * https://www.deviantart.com/apofiss/gallery/41388863/sceneries + * https://www.deviantart.com/kageuri/gallery/ + * https://www.deviantart.com/kageuri/gallery/?catpath=/ + * https://www.deviantart.com/apofiss/favourites/39881418/gifts-and + * https://www.deviantart.com/kageuri/favourites/ + * https://www.deviantart.com/kageuri/favourites/?catpath=/ + * + * NSFW: + * + * https://www.deviantart.com/revpeng/gallery/67734353/Siren-Lee-Agent-of-S-I-R-E-N-S + * + * + * + * Login Data (PLEASE DONT ACTUALLY USE!!!): + * + * email: 5g5_8l4dii5lbbpc@byom.de + * + * username: 5g58l4dii5lbbpc + * + * password: 5g5_8l4dii5lbbpc + * + * + * + */ +public class DeviantartRipper extends AbstractHTMLRipper { -public class DeviantartRipper extends AbstractJSONRipper { - String requestID; - String galleryID; - String username; - String baseApiUrl = "https://www.deviantart.com/dapi/v1/gallery/"; - String csrf; - Map pageCookies = new HashMap<>(); + private final String username = "5g58l4dii5lbbpc"; + private final String password = "5g5_8l4dii5lbbpc"; + private int offset = 0; + private boolean usingCatPath = false; + private int downloadCount = 0; + private Map cookies; + private DownloadThreadPool deviantartThreadPool = new DownloadThreadPool("deviantart"); + private ArrayList names = new ArrayList(); - private static final int PAGE_SLEEP_TIME = 3000, - IMAGE_SLEEP_TIME = 2000; + @Override + public DownloadThreadPool getThreadPool() { + return deviantartThreadPool; + } - private Map cookies = new HashMap<>(); - private Set triedURLs = new HashSet<>(); + public DeviantartRipper(URL url) throws IOException { + super(url); + } - public DeviantartRipper(URL url) throws IOException { - super(url); - } + @Override + protected String getDomain() { + return "deviantart.com"; + } - String loginCookies = "auth=__0f9158aaec09f417b235%3B%221ff79836392a515d154216d919eae573%22;" + - "auth_secure=__41d14dd0da101f411bb0%3B%2281cf2cf9477776162a1172543aae85ce%22;" + - "userinfo=__bf84ac233bfa8ae642e8%3B%7B%22username%22%3A%22grabpy%22%2C%22uniqueid%22%3A%22a0a876aa37dbd4b30e1c80406ee9c280%22%2C%22vd%22%3A%22BbHUXZ%2CBbHUXZ%2CA%2CU%2CA%2C%2CB%2CA%2CB%2CBbHUXZ%2CBbHUdj%2CL%2CL%2CA%2CBbHUdj%2C13%2CA%2CB%2CA%2C%2CA%2CA%2CB%2CA%2CA%2C%2CA%22%2C%22attr%22%3A56%7D"; + @Override + public String getHost() { + return "deviantart"; + } - @Override - public String getHost() { - return "deviantart"; - } + @Override + protected Document getFirstPage() throws IOException { + login(); + return Http.url(urlWithParams(this.offset)).cookies(getDACookie()).referrer("https://www.deviantart.com/") + .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:65.0) Gecko/20100101 Firefox/65.0").get(); + } - @Override - public String getDomain() { - return "deviantart.com"; - } + /** + * Stores logged in Cookies. Needed for art pieces only visible to logged in + * users. + * + * + * @throws IOException when failed to load webpage or failed to read/write + * cookies in file (used when running multiple instances of + * RipMe) + */ + private void login() throws IOException { - @Override - public URL sanitizeURL(URL url) throws MalformedURLException { - String u = url.toExternalForm(); - if (u.contains("/gallery/")) { - return url; - } else if (u.contains("/favourites")) { - return url; - } else if (u.contains("/favorites")) { - return url; - } + File f = new File("DACookie.toDelete"); + if (!f.exists()) { + f.createNewFile(); + f.deleteOnExit(); - if (!u.endsWith("/gallery/") && !u.endsWith("/gallery")) { - if (!u.endsWith("/")) { - u += "/gallery/"; - } else { - u += "gallery/"; - } - } + // Load login page + Response res = Http.url("https://www.deviantart.com/users/login").connection().method(Method.GET) + .referrer("https://www.deviantart.com/") + .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:65.0) Gecko/20100101 Firefox/65.0") + .execute(); + // Find tokens + Document doc = res.parse(); + Element form = doc.getElementById("login"); + String token = form.select("input[name=\"validate_token\"]").first().attr("value"); + String key = form.select("input[name=\"validate_key\"]").first().attr("value"); + System.out.println( + "------------------------------" + token + " & " + key + "------------------------------"); - Pattern p = Pattern.compile("^https?://www\\.deviantart\\.com/([a-zA-Z0-9\\-]+)/favou?rites/([0-9]+)/*?$"); - Matcher m = p.matcher(url.toExternalForm()); - if (!m.matches()) { - String subdir = "/"; - if (u.contains("catpath=scraps")) { - subdir = "scraps"; - } - u = u.replaceAll("\\?.*", "?catpath=" + subdir); - } - return new URL(u); - } + // Build Login Data + HashMap loginData = new HashMap(); + loginData.put("challenge", ""); + loginData.put("username", username); + loginData.put("password", password); + loginData.put("remember_me", "1"); + loginData.put("validate_token", token); + loginData.put("validate_key", key); + Map cookies = res.cookies(); - @Override - public String getGID(URL url) throws MalformedURLException { - Pattern p = Pattern.compile("^https?://www\\.deviantart\\.com/([a-zA-Z0-9\\-]+)(/gallery)?/?(\\?.*)?$"); - Matcher m = p.matcher(url.toExternalForm()); - if (m.matches()) { - // Root gallery - if (url.toExternalForm().contains("catpath=scraps")) { - return m.group(1) + "_scraps"; - } - else { - return m.group(1); - } - } - p = Pattern.compile("^https?://www\\.deviantart\\.com/([a-zA-Z0-9\\-]+)/gallery/([0-9]+).*$"); - m = p.matcher(url.toExternalForm()); - if (m.matches()) { - // Subgallery - return m.group(1) + "_" + m.group(2); - } - p = Pattern.compile("^https?://www\\.deviantart\\.com/([a-zA-Z0-9\\-]+)/favou?rites/([0-9]+)/.*?$"); - m = p.matcher(url.toExternalForm()); - if (m.matches()) { - return m.group(1) + "_faves_" + m.group(2); - } - p = Pattern.compile("^https?://www\\.deviantart\\.com/([a-zA-Z0-9\\-]+)/favou?rites/?$"); - m = p.matcher(url.toExternalForm()); - if (m.matches()) { - // Subgallery - return m.group(1) + "_faves"; - } - throw new MalformedURLException("Expected URL format: http://www.deviantart.com/username[/gallery/#####], got: " + url); - } + // Log in using data. Handle redirect + res = Http.url("https://www.deviantart.com/users/login").connection() + .referrer("https://www.deviantart.com/") + .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:65.0) Gecko/20100101 Firefox/65.0") + .method(Method.POST).data(loginData).cookies(cookies).followRedirects(false).execute(); + this.cookies = res.cookies(); - private String getUsernameFromURL(String u) { - Pattern p = Pattern.compile("^https?://www\\.deviantart\\.com/([a-zA-Z0-9\\-]+)/gallery/?(\\S+)?"); - Matcher m = p.matcher(url.toExternalForm()); - if (m.matches()) { - return m.group(1); - } - return null; + res = Http.url(res.header("location")).connection().referrer("https://www.deviantart.com/") + .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:65.0) Gecko/20100101 Firefox/65.0") + .method(Method.GET).cookies(cookies).followRedirects(false).execute(); - } + // Store cookies + updateCookie(res.cookies()); - private String getFullsizedNSFWImage(String pageURL) { - try { - Document doc = Http.url(pageURL).cookies(cookies).get(); - String imageToReturn = ""; - String[] d = doc.select("img").attr("srcset").split(","); + // Apply agegate + this.cookies.put("agegate_state", "1"); - String s = d[d.length -1].split(" ")[0]; - LOGGER.info("2:" + s); + // Write Cookie to file for other RipMe Instances + try { + FileOutputStream fileOut = new FileOutputStream(f); + ObjectOutputStream out = new ObjectOutputStream(fileOut); + out.writeObject(this.cookies); + out.close(); + fileOut.close(); + } catch (IOException i) { + i.printStackTrace(); + } - if (s == null || s.equals("")) { - LOGGER.error("Could not find full sized image at " + pageURL); - } - return s; - } catch (IOException e) { - LOGGER.error("Could not find full sized image at " + pageURL); - return null; - } - } + } else { - /** - * Gets first page. - * Will determine if login is supplied, - * if there is a login, then login and add that login cookies. - * Otherwise, just bypass the age gate with an anonymous flag. - * @return - * @throws IOException - */ - @Override - public JSONObject getFirstPage() throws IOException { - - // Base64 da login - // username: Z3JhYnB5 - // password: ZmFrZXJz + // When cookie file already exists (from another RipMe instance) + while (this.cookies == null) { + try { + Thread.sleep(2000); + FileInputStream fileIn = new FileInputStream(f); + ObjectInputStream in = new ObjectInputStream(fileIn); + this.cookies = (Map) in.readObject(); + in.close(); + fileIn.close(); + } catch (IOException | ClassNotFoundException | InterruptedException i) { + i.printStackTrace(); + } + } + } + System.out.println("------------------------------" + this.cookies + "------------------------------"); + } - cookies = getDACookies(); - if (cookies.isEmpty()) { - LOGGER.warn("Failed to get login cookies"); - cookies.put("agegate_state","1"); // Bypasses the age gate - } - cookies.put("agegate_state", "1"); - - Response res = Http.url(this.url) - .cookies(cookies) - .response(); - Document page = res.parse(); + /** + * Returns next page Document using offset. + */ + @Override + public Document getNextPage(Document doc) throws IOException { + this.offset += 24; + Response re = Http.url(urlWithParams(this.offset)).cookies(getDACookie()) + .referrer("https://www.deviantart.com/") + .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:65.0) Gecko/20100101 Firefox/65.0").response(); + updateCookie(re.cookies()); + Document docu = re.parse(); + Elements messages = docu.getElementsByClass("message"); + System.out.println("------------------------------Current Offset: " + this.offset + + " - More Pages?------------------------------"); - JSONObject firstPageJSON = getFirstPageJSON(page); - requestID = firstPageJSON.getJSONObject("dapx").getString("requestid"); - galleryID = getGalleryID(page); - username = getUsernameFromURL(url.toExternalForm()); - csrf = firstPageJSON.getString("csrf"); - pageCookies = res.cookies(); + if (messages.size() > 0) { - return requestPage(0, galleryID, username, requestID, csrf, pageCookies); - } + // if message exists -> last page + System.out.println("------------------------------Messages amount: " + messages.size() + + " - Next Page does not exists------------------------------"); + throw new IOException("No more pages"); + } - private JSONObject requestPage(int offset, String galleryID, String username, String requestID, String csfr, Map c) { - LOGGER.debug("offset: " + Integer.toString(offset)); - LOGGER.debug("galleryID: " + galleryID); - LOGGER.debug("username: " + username); - LOGGER.debug("requestID: " + requestID); - String url = baseApiUrl + galleryID + "?iid=" + requestID; - try { - Document doc = Http.url(url).cookies(c).data("username", username).data("offset", Integer.toString(offset)) - .data("limit", "24").data("_csrf", csfr).data("id", requestID) - .ignoreContentType().post(); - return new JSONObject(doc.body().text()); - } catch (IOException e) { - LOGGER.error("Got error trying to get page: " + e.getMessage()); - e.printStackTrace(); - return null; - } + return Http.url(urlWithParams(this.offset)).referrer("https://www.deviantart.com/") + .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:65.0) Gecko/20100101 Firefox/65.0") + .cookies(getDACookie()).get(); + } - } + /** + * Returns list of Links to the Image pages. NOT links to fullsize image!!! e.g. + * https://www.deviantart.com/kageuri/art/RUBY-568396655 + */ + @Override + protected List getURLsFromPage(Document page) { - private JSONObject getFirstPageJSON(Document doc) { - for (Element js : doc.select("script")) { - if (js.html().contains("requestid")) { - String json = js.html().replaceAll("window.__initial_body_data=", "").replaceAll("\\);", "") - .replaceAll(";__wake\\(.+", ""); - JSONObject j = new JSONObject(json); - return j; - } - } - return null; - } + List result = new ArrayList(); - public String getGalleryID(Document doc) { - // If the url contains catpath we return 0 as the DA api will provide all galery images if you sent the - // gallery id to 0 - if (url.toExternalForm().contains("catpath=")) { - return "0"; - } - Pattern p = Pattern.compile("^https?://www\\.deviantart\\.com/[a-zA-Z0-9\\-]+/gallery/([0-9]+)/?\\S+"); - Matcher m = p.matcher(url.toExternalForm()); - if (m.matches()) { - return m.group(1); - } - for (Element el : doc.select("input[name=set]")) { - try { - String galleryID = el.attr("value"); - return galleryID; - } catch (NullPointerException e) { - continue; - } - } - LOGGER.error("Could not find gallery ID"); - return null; - } + Element div; + if (usingCatPath) { + div = page.getElementById("gmi-"); - public String getUsername(Document doc) { - return doc.select("meta[property=og:title]").attr("content") - .replaceAll("'s DeviantArt gallery", "").replaceAll("'s DeviantArt Gallery", ""); - } - + } else { + div = page.getElementsByClass("folderview-art").first().child(0); - @Override - public List getURLsFromJSON(JSONObject json) { - List imageURLs = new ArrayList<>(); - JSONArray results = json.getJSONObject("content").getJSONArray("results"); - for (int i = 0; i < results.length(); i++) { - Document doc = Jsoup.parseBodyFragment(results.getJSONObject(i).getString("html")); - if (doc.html().contains("ismature")) { - LOGGER.info("Downloading nsfw image"); - String nsfwImage = getFullsizedNSFWImage(doc.select("span").attr("href")); - if (nsfwImage != null && nsfwImage.startsWith("http")) { - imageURLs.add(nsfwImage); - } - } - try { - String imageURL = doc.select("span").first().attr("data-super-full-img"); - if (!imageURL.isEmpty() && imageURL.startsWith("http")) { - imageURLs.add(imageURL); - } - } catch (NullPointerException e) { - LOGGER.info(i + " does not contain any images"); - } + } + Elements links = div.select("a.torpedo-thumb-link"); - } - return imageURLs; - } + for (Element el : links) { + result.add(el.attr("href")); + } - @Override - public JSONObject getNextPage(JSONObject page) throws IOException { - boolean hasMore = page.getJSONObject("content").getBoolean("has_more"); - if (hasMore) { - return requestPage(page.getJSONObject("content").getInt("next_offset"), galleryID, username, requestID, csrf, pageCookies); - } + System.out.println("------------------------------Amount of Images on Page: " + result.size() + + "------------------------------"); + System.out.println("------------------------------" + page.location() + "------------------------------"); - throw new IOException("No more pages"); - } + return result; + } - @Override - public boolean keepSortOrder() { - // Don't keep sort order (do not add prefixes). - // Causes file duplication, as outlined in https://github.com/4pr0n/ripme/issues/113 - return false; - } + /** + * Starts new Thread to find download link + filename + filetype + */ + @Override + protected void downloadURL(URL url, int index) { + this.downloadCount += 1; + System.out.println("------------------------------Download URL Number " + this.downloadCount + + "------------------------------"); + System.out.println( + "------------------------------DAURL: " + url.toExternalForm() + "------------------------------"); + try { + Response re = Http.url(urlWithParams(this.offset)).cookies(getDACookie()) + .referrer("https://www.deviantart.com/") + .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:65.0) Gecko/20100101 Firefox/65.0") + .response(); + updateCookie(re.cookies()); + } catch (IOException e) { + e.printStackTrace(); + } - @Override - public void downloadURL(URL url, int index) { - addURLToDownload(url, getPrefix(index), "", this.url.toExternalForm(), cookies); - sleep(IMAGE_SLEEP_TIME); - } + // Start Thread and add to pool. + DeviantartImageThread t = new DeviantartImageThread(url); + deviantartThreadPool.addThread(t); - /** - * Tries to get full size image from thumbnail URL - * @param thumb Thumbnail URL - * @param throwException Whether or not to throw exception when full size image isn't found - * @return Full-size image URL - * @throws Exception If it can't find the full-size URL - */ - private static String thumbToFull(String thumb, boolean throwException) throws Exception { - thumb = thumb.replace("http://th", "http://fc"); - List fields = new ArrayList<>(Arrays.asList(thumb.split("/"))); - fields.remove(4); - if (!fields.get(4).equals("f") && throwException) { - // Not a full-size image - throw new Exception("Can't get full size image from " + thumb); - } - StringBuilder result = new StringBuilder(); - for (int i = 0; i < fields.size(); i++) { - if (i > 0) { - result.append("/"); - } - result.append(fields.get(i)); - } - return result.toString(); - } + } + @Override + public String normalizeUrl(String url) { + return (urlWithParams(this.offset).toExternalForm()); + } + /** + * Returns name of album. Album name consists of 3 words: - Artist (owner of + * gallery) - Type (gallery or favorites folder) - Name of the folder + * + * Returns artist_type_name + */ + @Override + public String getGID(URL url) throws MalformedURLException { - /** - * If largest resolution for image at 'thumb' is found, starts downloading - * and returns null. - * If it finds a larger resolution on another page, returns the image URL. - * @param thumb Thumbnail URL - * @param page Page the thumbnail is retrieved from - * @return Highest-resolution version of the image based on thumbnail URL and the page. - */ - private String smallToFull(String thumb, String page) { - try { - // Fetch the image page - Response resp = Http.url(page) - .referrer(this.url) - .cookies(cookies) - .response(); - cookies.putAll(resp.cookies()); - Document doc = resp.parse(); - Elements els = doc.select("img.dev-content-full"); - String fsimage = null; - // Get the largest resolution image on the page - if (!els.isEmpty()) { - // Large image - fsimage = els.get(0).attr("src"); - LOGGER.info("Found large-scale: " + fsimage); - if (fsimage.contains("//orig")) { - return fsimage; - } - } - // Try to find the download button - els = doc.select("a.dev-page-download"); - if (!els.isEmpty()) { - // Full-size image - String downloadLink = els.get(0).attr("href"); - LOGGER.info("Found download button link: " + downloadLink); - HttpURLConnection con = (HttpURLConnection) new URL(downloadLink).openConnection(); - con.setRequestProperty("Referer",this.url.toString()); - String cookieString = ""; - for (Map.Entry entry : cookies.entrySet()) { - cookieString = cookieString + entry.getKey() + "=" + entry.getValue() + "; "; - } - cookieString = cookieString.substring(0,cookieString.length() - 1); - con.setRequestProperty("Cookie",cookieString); - con.setRequestProperty("User-Agent", USER_AGENT); - con.setInstanceFollowRedirects(true); - con.connect(); - int code = con.getResponseCode(); - String location = con.getURL().toString(); - con.disconnect(); - if (location.contains("//orig")) { - fsimage = location; - LOGGER.info("Found image download: " + location); - } - } - if (fsimage != null) { - return fsimage; - } - throw new IOException("No download page found"); - } catch (IOException ioe) { - try { - LOGGER.info("Failed to get full size download image at " + page + " : '" + ioe.getMessage() + "'"); - String lessThanFull = thumbToFull(thumb, false); - LOGGER.info("Falling back to less-than-full-size image " + lessThanFull); - return lessThanFull; - } catch (Exception e) { - return null; - } - } - } + String s = url.toExternalForm(); + String artist = "unknown"; + String what = "unknown"; + String albumname = "unknown"; - /** - * Returns DA cookies. - * @return Map of cookies containing session data. - */ - private Map getDACookies() { - return RipUtils.getCookiesFromString(Utils.getConfigString("deviantart.cookies", loginCookies)); - } + if (url.toExternalForm().contains("catpath=/")) { + this.usingCatPath = true; + } + + Pattern p = Pattern.compile("^https?://www.deviantart\\.com/([a-zA-Z0-9]+).*$"); + Matcher m = p.matcher(s); + + // Artist + if (m.matches()) { + artist = m.group(1); + } else { + throw new MalformedURLException("Expected deviantart.com URL format: " + + "www.deviantart.com//gallery//\nOR\nwww.deviantart.com//favourites// - got " + + url + " instead"); + } + + // What is it + if (s.contains("/gallery/")) { + what = "gallery"; + } else if (s.contains("/favourites/")) { + what = "favourites"; + } else { + throw new MalformedURLException("Expected deviantart.com URL format: " + + "www.deviantart.com//gallery//\nOR\nwww.deviantart.com//favourites// - got " + + url + " instead"); + } + + // Album Name + Pattern artistP = Pattern + .compile("^https?://www.deviantart\\.com/[a-zA-Z0-9]+/[a-zA-Z]+/[0-9]+/([a-zA-Z0-9-]+).*$"); + Matcher artistM = artistP.matcher(s); + if (s.endsWith("?catpath=/")) { + albumname = "all"; + } else if (s.endsWith("/favourites/") || s.endsWith("/gallery/")) { + albumname = "featured"; + } else if (artistM.matches()) { + albumname = artistM.group(1); + } + System.out.println("------------------------------Album Name: " + artist + "_" + what + "_" + albumname + + "------------------------------"); + + return artist + "_" + what + "_" + albumname; + + } + + /** + * + * @return Clean URL as String + */ + private String cleanURL() { + return (this.url.toExternalForm().split("\\?"))[0]; + } + + /** + * Return correct url with params (catpath) and current offset + * + * @return URL to page with offset + */ + private URL urlWithParams(int offset) { + try { + String url = cleanURL(); + if (this.usingCatPath) { + return (new URL(url + "?catpath=/&offset=" + offset)); + } else { + return (new URL(url + "?offset=" + offset)); + } + } catch (MalformedURLException e) { + e.printStackTrace(); + } + return null; + } + + /** + * Returns Hashmap usable as Cookie for NSFW Artworks Not really needed but + * maybe useful later. + * + * @return Cookie Hashmap + */ + private Map getDACookie() { + return this.cookies; + } + + private void updateCookie(Map m) { + + System.out.println("------------------------------Updating Cookies------------------------------"); + System.out.println( + "------------------------------Old Cookies: " + this.cookies + " ------------------------------"); + System.out.println("------------------------------New Cookies: " + m + " ------------------------------"); + this.cookies.putAll(m); + this.cookies.put("agegate_state", "1"); + System.out.println( + "------------------------------Merged Cookies: " + this.cookies + " ------------------------------"); + + } + + /** + * Analyzes an image page like + * https://www.deviantart.com/kageuri/art/RUBY-568396655 . + * + * Looks for download button, follows the authentications and redirects and adds + * the Image URL to the download queue. If no download button is present it will + * use the largest version of the image. + * + * Should work with all filetypes on Deviantart. Tested with .JPG .PNG and .PDF + * + * @author MrPlaygon + * + */ + private class DeviantartImageThread extends Thread { + private URL url; + + public DeviantartImageThread(URL url) { + this.url = url; + } + + @Override + public void run() { + getFullSizeURL(); + } + + /** + * Get URL to Artwork and return fullsize URL with file ending. + * + * @param page Like + * https://www.deviantart.com/apofiss/art/warmest-of-the-days-455668450 + * @return URL like + * https://images-wixmp-ed30a86b8c4ca887773594c2.wixmp.com/intermediary/f/07f7a6bb-2d35-4630-93fc-be249af22b3e/d7jak0y-d20e5932-df72-4d13-b002-5e122037b373.jpg + * + * + */ + private void getFullSizeURL() { + + System.out.println("------------------------------------------------------------"); + System.out.println("------------------------------Searching max. Resolution for " + url + + "------------------------------"); + sendUpdate(STATUS.LOADING_RESOURCE, "Searching max. resolution for " + url); + try { + Response re = Http.url(url).connection().referrer("https://www.deviantart.com/") + .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:65.0) Gecko/20100101 Firefox/65.0") + .cookies(getDACookie()).execute(); + Document doc = re.parse(); + + // Artwork Title + String title = doc.select("a.title").first().html(); + title = title.replaceAll("[^a-zA-Z0-9\\.\\-]", "_").toLowerCase(); + + int counter = 1; + if (names.contains(title)) { + while (names.contains(title + "_" + counter)) { + counter++; + } + title = title + "_" + counter; + } + names.add(title); + + // Check for download button + Element downloadButton = null; + + downloadButton = doc.select("a.dev-page-download").first(); + + // Download Button + if (downloadButton != null) { + System.out.println("------------------------------Download Button found: " + + downloadButton.attr("href") + "------------------------------"); + + Response download = Http.url(downloadButton.attr("href")).connection().cookies(getDACookie()) + .method(Method.GET).referrer("https://www.deviantart.com/") + .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:65.0) Gecko/20100101 Firefox/65.0") + .ignoreContentType(true).followRedirects(true).execute(); + URL location = download.url(); + + String[] filetypePart = download.header("Content-Disposition").split("\\."); + + System.out.println("------------------------------Found Image URL------------------------------"); + System.out.println("------------------------------" + url + "------------------------------"); + System.out.println("------------------------------" + location + "------------------------------"); + + addURLToDownload(location, "", "", "", new HashMap(), + title + "." + filetypePart[filetypePart.length - 1]); + return; + } + + // No Download Button + Element div = doc.select("div.dev-view-deviation").first(); + + Element image = div.getElementsByTag("img").first(); + + String source = ""; + if (image == null) { + System.out.println( + "------------------------------!!!ERROR on " + url + " !!!------------------------------"); + + System.out.println("------------------------------!!!Cookies: " + getDACookie() + + " ------------------------------"); + System.out.println(div); + sendUpdate(STATUS.DOWNLOAD_ERRORED, "!!!ERROR!!!\n" + url); + return; + } + + // When it is text art (e.g. story) the only image is the avator (profile + // picture) + if (image.hasClass("avatar")) { + System.out.println( + "------------------------------No Image found, probably text art------------------------------"); + System.out.println(url); + return; + } + + source = image.attr("src"); + + String[] parts = source.split("/v1/"); + + // Image page uses scaled down version. Split at /v1/ to receive max size. + if (parts.length > 2) { + System.out.println( + "------------------------------Unexpected URL Format------------------------------"); + sendUpdate(STATUS.DOWNLOAD_WARN, "Unexpected URL Format - Risky Try"); + return; + } + + String[] tmpParts = parts[0].split("\\."); + + System.out.println("------------------------------Found Image URL------------------------------"); + System.out.println("------------------------------" + url + "------------------------------"); + System.out.println("------------------------------" + parts[0] + "------------------------------"); + + addURLToDownload(new URL(parts[0]), "", "", "", new HashMap(), + title + "." + tmpParts[tmpParts.length - 1]); + + } catch (IOException e) { + e.printStackTrace(); + } + + System.out.println( + "------------------------------No Full Size URL for: " + url + "------------------------------"); + sendUpdate(STATUS.DOWNLOAD_ERRORED, "No image found for " + url); + + return; + + } + } } \ No newline at end of file diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/DeviantartRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/DeviantartRipperTest.java index 6a1a054b..3fe1a719 100644 --- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/DeviantartRipperTest.java +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/DeviantartRipperTest.java @@ -9,6 +9,7 @@ import com.rarchives.ripme.ripper.rippers.DeviantartRipper; import com.rarchives.ripme.utils.Http; import org.jsoup.nodes.Document; +//TODO build some tests public class DeviantartRipperTest extends RippersTest { public void testDeviantartAlbum() throws IOException { DeviantartRipper ripper = new DeviantartRipper(new URL("https://www.deviantart.com/airgee/gallery/")); @@ -31,8 +32,8 @@ public class DeviantartRipperTest extends RippersTest { URL url = new URL("https://www.deviantart.com/airgee/gallery/"); DeviantartRipper ripper = new DeviantartRipper(url); Document doc = Http.url(url).get(); - assertEquals("airgee", ripper.getUsername(doc)); - assertEquals("714589", ripper.getGalleryID(doc)); + //assertEquals("airgee", ripper.getUsername(doc)); + //assertEquals("714589", ripper.getGalleryID(doc)); } public void testSanitizeURL() throws IOException { From 9eac66ef103bc7e033711d65e4712034f6177e3a Mon Sep 17 00:00:00 2001 From: MrPlaygon <-> Date: Sat, 16 Feb 2019 18:55:52 +0100 Subject: [PATCH 2/9] Small fix --- .../rarchives/ripme/ripper/rippers/DeviantartRipper.java | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java index a81c0656..1f22f625 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java @@ -32,8 +32,6 @@ import org.jsoup.select.Elements; * * NOT using Deviantart API like the old JSON ripper because it is SLOW * and somehow annoying to use. - * - * * Things to consider: Using the API might be less work/maintenance later because APIs do not change as frequently as HTML source code...? * * @@ -308,7 +306,7 @@ public class DeviantartRipper extends AbstractHTMLRipper { artist = m.group(1); } else { throw new MalformedURLException("Expected deviantart.com URL format: " - + "www.deviantart.com//gallery//\nOR\nwww.deviantart.com//favourites// - got " + + "www.deviantart.com//gallery//\nOR\nwww.deviantart.com//favourites//\\nOr simply the gallery or favorites of some artist - got " + url + " instead"); } @@ -319,7 +317,7 @@ public class DeviantartRipper extends AbstractHTMLRipper { what = "favourites"; } else { throw new MalformedURLException("Expected deviantart.com URL format: " - + "www.deviantart.com//gallery//\nOR\nwww.deviantart.com//favourites// - got " + + "www.deviantart.com//gallery//\nOR\nwww.deviantart.com//favourites//\nOr simply the gallery or favorites of some artist - got " + url + " instead"); } @@ -524,6 +522,7 @@ public class DeviantartRipper extends AbstractHTMLRipper { addURLToDownload(new URL(parts[0]), "", "", "", new HashMap(), title + "." + tmpParts[tmpParts.length - 1]); + return; } catch (IOException e) { e.printStackTrace(); From 48474ad7e78d8b2ad13a7f73b9ba7cdbb126cbaa Mon Sep 17 00:00:00 2001 From: MrPlaygon <-> Date: Sat, 16 Feb 2019 19:08:21 +0100 Subject: [PATCH 3/9] changed comment because of code factor check --- .../ripme/tst/ripper/rippers/DeviantartRipperTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/DeviantartRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/DeviantartRipperTest.java index 3fe1a719..553d499b 100644 --- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/DeviantartRipperTest.java +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/DeviantartRipperTest.java @@ -9,7 +9,6 @@ import com.rarchives.ripme.ripper.rippers.DeviantartRipper; import com.rarchives.ripme.utils.Http; import org.jsoup.nodes.Document; -//TODO build some tests public class DeviantartRipperTest extends RippersTest { public void testDeviantartAlbum() throws IOException { DeviantartRipper ripper = new DeviantartRipper(new URL("https://www.deviantart.com/airgee/gallery/")); @@ -32,6 +31,7 @@ public class DeviantartRipperTest extends RippersTest { URL url = new URL("https://www.deviantart.com/airgee/gallery/"); DeviantartRipper ripper = new DeviantartRipper(url); Document doc = Http.url(url).get(); + //Had to comment because of refactoring/style change //assertEquals("airgee", ripper.getUsername(doc)); //assertEquals("714589", ripper.getGalleryID(doc)); } From 82e98abf7fc879208f345b4b47eaf0348b5037ea Mon Sep 17 00:00:00 2001 From: MrPlaygon <-> Date: Sun, 17 Feb 2019 10:39:14 +0100 Subject: [PATCH 4/9] Using constants for useragent and referer now. --- .../ripper/rippers/DeviantartRipper.java | 48 ++++++++----------- 1 file changed, 20 insertions(+), 28 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java index 1f22f625..a4d76f54 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java @@ -31,8 +31,9 @@ import org.jsoup.select.Elements; * @author MrPlaygon * * NOT using Deviantart API like the old JSON ripper because it is SLOW - * and somehow annoying to use. - * Things to consider: Using the API might be less work/maintenance later because APIs do not change as frequently as HTML source code...? + * and somehow annoying to use. Things to consider: Using the API might + * be less work/maintenance later because APIs do not change as + * frequently as HTML source code...? * * * @@ -75,6 +76,10 @@ public class DeviantartRipper extends AbstractHTMLRipper { private DownloadThreadPool deviantartThreadPool = new DownloadThreadPool("deviantart"); private ArrayList names = new ArrayList(); + // Constants + private final String referer = "https://www.deviantart.com/"; + private final String userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:65.0) Gecko/20100101 Firefox/65.0"; + @Override public DownloadThreadPool getThreadPool() { return deviantartThreadPool; @@ -97,8 +102,7 @@ public class DeviantartRipper extends AbstractHTMLRipper { @Override protected Document getFirstPage() throws IOException { login(); - return Http.url(urlWithParams(this.offset)).cookies(getDACookie()).referrer("https://www.deviantart.com/") - .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:65.0) Gecko/20100101 Firefox/65.0").get(); + return Http.url(urlWithParams(this.offset)).cookies(getDACookie()).referrer(referer).userAgent(userAgent).get(); } /** @@ -119,9 +123,7 @@ public class DeviantartRipper extends AbstractHTMLRipper { // Load login page Response res = Http.url("https://www.deviantart.com/users/login").connection().method(Method.GET) - .referrer("https://www.deviantart.com/") - .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:65.0) Gecko/20100101 Firefox/65.0") - .execute(); + .referrer(referer).userAgent(userAgent).execute(); // Find tokens Document doc = res.parse(); @@ -142,14 +144,11 @@ public class DeviantartRipper extends AbstractHTMLRipper { Map cookies = res.cookies(); // Log in using data. Handle redirect - res = Http.url("https://www.deviantart.com/users/login").connection() - .referrer("https://www.deviantart.com/") - .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:65.0) Gecko/20100101 Firefox/65.0") + res = Http.url("https://www.deviantart.com/users/login").connection().referrer(referer).userAgent(userAgent) .method(Method.POST).data(loginData).cookies(cookies).followRedirects(false).execute(); this.cookies = res.cookies(); - res = Http.url(res.header("location")).connection().referrer("https://www.deviantart.com/") - .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:65.0) Gecko/20100101 Firefox/65.0") + res = Http.url(res.header("location")).connection().referrer(referer).userAgent(userAgent) .method(Method.GET).cookies(cookies).followRedirects(false).execute(); // Store cookies @@ -195,9 +194,8 @@ public class DeviantartRipper extends AbstractHTMLRipper { @Override public Document getNextPage(Document doc) throws IOException { this.offset += 24; - Response re = Http.url(urlWithParams(this.offset)).cookies(getDACookie()) - .referrer("https://www.deviantart.com/") - .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:65.0) Gecko/20100101 Firefox/65.0").response(); + Response re = Http.url(urlWithParams(this.offset)).cookies(getDACookie()).referrer(referer).userAgent(userAgent) + .response(); updateCookie(re.cookies()); Document docu = re.parse(); Elements messages = docu.getElementsByClass("message"); @@ -212,9 +210,7 @@ public class DeviantartRipper extends AbstractHTMLRipper { throw new IOException("No more pages"); } - return Http.url(urlWithParams(this.offset)).referrer("https://www.deviantart.com/") - .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:65.0) Gecko/20100101 Firefox/65.0") - .cookies(getDACookie()).get(); + return Http.url(urlWithParams(this.offset)).referrer(referer).userAgent(userAgent).cookies(getDACookie()).get(); } @@ -260,10 +256,8 @@ public class DeviantartRipper extends AbstractHTMLRipper { System.out.println( "------------------------------DAURL: " + url.toExternalForm() + "------------------------------"); try { - Response re = Http.url(urlWithParams(this.offset)).cookies(getDACookie()) - .referrer("https://www.deviantart.com/") - .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:65.0) Gecko/20100101 Firefox/65.0") - .response(); + Response re = Http.url(urlWithParams(this.offset)).cookies(getDACookie()).referrer(referer) + .userAgent(userAgent).response(); updateCookie(re.cookies()); } catch (IOException e) { e.printStackTrace(); @@ -431,9 +425,8 @@ public class DeviantartRipper extends AbstractHTMLRipper { + "------------------------------"); sendUpdate(STATUS.LOADING_RESOURCE, "Searching max. resolution for " + url); try { - Response re = Http.url(url).connection().referrer("https://www.deviantart.com/") - .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:65.0) Gecko/20100101 Firefox/65.0") - .cookies(getDACookie()).execute(); + Response re = Http.url(url).connection().referrer(referer).userAgent(userAgent).cookies(getDACookie()) + .execute(); Document doc = re.parse(); // Artwork Title @@ -460,9 +453,8 @@ public class DeviantartRipper extends AbstractHTMLRipper { + downloadButton.attr("href") + "------------------------------"); Response download = Http.url(downloadButton.attr("href")).connection().cookies(getDACookie()) - .method(Method.GET).referrer("https://www.deviantart.com/") - .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:65.0) Gecko/20100101 Firefox/65.0") - .ignoreContentType(true).followRedirects(true).execute(); + .method(Method.GET).referrer(referer).userAgent(userAgent).ignoreContentType(true) + .followRedirects(true).execute(); URL location = download.url(); String[] filetypePart = download.header("Content-Disposition").split("\\."); From da47ca0c9d9df3252aaf7ae89bfdf188652c0e97 Mon Sep 17 00:00:00 2001 From: MrPlaygon <-> Date: Thu, 28 Feb 2019 19:45:30 +0100 Subject: [PATCH 5/9] Logger changes and using config now to store login cookies. --- .../ripper/rippers/DeviantartRipper.java | 164 +++++++++--------- 1 file changed, 83 insertions(+), 81 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java index a4d76f54..7045ac5c 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java @@ -4,16 +4,18 @@ import com.rarchives.ripme.ripper.AbstractHTMLRipper; import com.rarchives.ripme.ripper.DownloadThreadPool; import com.rarchives.ripme.ui.RipStatusMessage.STATUS; import com.rarchives.ripme.utils.Http; +import com.rarchives.ripme.utils.Utils; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.ObjectInputStream; import java.io.ObjectOutputStream; +import java.io.Serializable; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; +import java.util.Base64; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -72,13 +74,14 @@ public class DeviantartRipper extends AbstractHTMLRipper { private int offset = 0; private boolean usingCatPath = false; private int downloadCount = 0; - private Map cookies; + private Map cookies = null; private DownloadThreadPool deviantartThreadPool = new DownloadThreadPool("deviantart"); private ArrayList names = new ArrayList(); // Constants private final String referer = "https://www.deviantart.com/"; private final String userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:65.0) Gecko/20100101 Firefox/65.0"; + private final String utilsKey = "DeviantartLogin.cookies"; @Override public DownloadThreadPool getThreadPool() { @@ -116,10 +119,15 @@ public class DeviantartRipper extends AbstractHTMLRipper { */ private void login() throws IOException { - File f = new File("DACookie.toDelete"); - if (!f.exists()) { - f.createNewFile(); - f.deleteOnExit(); + try { + String dACookies = Utils.getConfigString(utilsKey, null); + this.cookies = dACookies != null ? deserialize(dACookies) : null; + } catch (ClassNotFoundException e) { + e.printStackTrace(); + } + if (this.cookies == null) { + LOGGER.info("Log in now"); + // Do login now // Load login page Response res = Http.url("https://www.deviantart.com/users/login").connection().method(Method.GET) @@ -130,8 +138,7 @@ public class DeviantartRipper extends AbstractHTMLRipper { Element form = doc.getElementById("login"); String token = form.select("input[name=\"validate_token\"]").first().attr("value"); String key = form.select("input[name=\"validate_key\"]").first().attr("value"); - System.out.println( - "------------------------------" + token + " & " + key + "------------------------------"); + LOGGER.info("Token: " + token + " & Key: " + key); // Build Login Data HashMap loginData = new HashMap(); @@ -156,36 +163,13 @@ public class DeviantartRipper extends AbstractHTMLRipper { // Apply agegate this.cookies.put("agegate_state", "1"); + // Write Cookie to file for other RipMe Instances or later use + Utils.setConfigString(utilsKey, serialize(new HashMap(this.cookies))); + Utils.saveConfig(); // save now because of other instances that might work simultaneously - // Write Cookie to file for other RipMe Instances - try { - FileOutputStream fileOut = new FileOutputStream(f); - ObjectOutputStream out = new ObjectOutputStream(fileOut); - out.writeObject(this.cookies); - out.close(); - fileOut.close(); - } catch (IOException i) { - i.printStackTrace(); - } - - } else { - - // When cookie file already exists (from another RipMe instance) - while (this.cookies == null) { - try { - Thread.sleep(2000); - FileInputStream fileIn = new FileInputStream(f); - ObjectInputStream in = new ObjectInputStream(fileIn); - this.cookies = (Map) in.readObject(); - in.close(); - fileIn.close(); - } catch (IOException | ClassNotFoundException | InterruptedException i) { - i.printStackTrace(); - } - } } - System.out.println("------------------------------" + this.cookies + "------------------------------"); + LOGGER.info("DA Cookies: " + this.cookies); } /** @@ -199,14 +183,12 @@ public class DeviantartRipper extends AbstractHTMLRipper { updateCookie(re.cookies()); Document docu = re.parse(); Elements messages = docu.getElementsByClass("message"); - System.out.println("------------------------------Current Offset: " + this.offset - + " - More Pages?------------------------------"); + LOGGER.info("Current Offset: " + this.offset); if (messages.size() > 0) { // if message exists -> last page - System.out.println("------------------------------Messages amount: " + messages.size() - + " - Next Page does not exists------------------------------"); + LOGGER.info("Messages amount: " + messages.size() + " - Next Page does not exists"); throw new IOException("No more pages"); } @@ -238,9 +220,8 @@ public class DeviantartRipper extends AbstractHTMLRipper { } - System.out.println("------------------------------Amount of Images on Page: " + result.size() - + "------------------------------"); - System.out.println("------------------------------" + page.location() + "------------------------------"); + LOGGER.info("Amount of Images on Page: " + result.size()); + LOGGER.info(page.location()); return result; } @@ -251,10 +232,8 @@ public class DeviantartRipper extends AbstractHTMLRipper { @Override protected void downloadURL(URL url, int index) { this.downloadCount += 1; - System.out.println("------------------------------Download URL Number " + this.downloadCount - + "------------------------------"); - System.out.println( - "------------------------------DAURL: " + url.toExternalForm() + "------------------------------"); + LOGGER.info("Downloading URL Number " + this.downloadCount); + LOGGER.info("Deviant Art URL: " + url.toExternalForm()); try { Response re = Http.url(urlWithParams(this.offset)).cookies(getDACookie()).referrer(referer) .userAgent(userAgent).response(); @@ -326,8 +305,7 @@ public class DeviantartRipper extends AbstractHTMLRipper { } else if (artistM.matches()) { albumname = artistM.group(1); } - System.out.println("------------------------------Album Name: " + artist + "_" + what + "_" + albumname - + "------------------------------"); + LOGGER.info("Album Name: " + artist + "_" + what + "_" + albumname); return artist + "_" + what + "_" + albumname; @@ -372,17 +350,49 @@ public class DeviantartRipper extends AbstractHTMLRipper { private void updateCookie(Map m) { - System.out.println("------------------------------Updating Cookies------------------------------"); - System.out.println( - "------------------------------Old Cookies: " + this.cookies + " ------------------------------"); - System.out.println("------------------------------New Cookies: " + m + " ------------------------------"); + LOGGER.info("Updating Cookies"); + LOGGER.info("Old Cookies: " + this.cookies + " "); + LOGGER.info("New Cookies: " + m + " "); this.cookies.putAll(m); this.cookies.put("agegate_state", "1"); - System.out.println( - "------------------------------Merged Cookies: " + this.cookies + " ------------------------------"); + LOGGER.info("Merged Cookies: " + this.cookies + " "); } + /** + * Serializes an Object and returns a String ready to store Used to store + * cookies in the config file because the deviantart cookies contain all sort of + * special characters like ; , = : and so on. + * + * @param o Object to serialize + * @return The serialized base64 encoded object + * @throws IOException + */ + private String serialize(Serializable o) throws IOException { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + ObjectOutputStream oos = new ObjectOutputStream(baos); + oos.writeObject(o); + oos.close(); + return Base64.getEncoder().encodeToString(baos.toByteArray()); + } + + /** + * Recreates the object from the base64 encoded String. Used for Cookies + * + * @param s the base64 encoded string + * @return the Cookie Map + * @throws IOException + * @throws ClassNotFoundException + */ + private Map deserialize(String s) throws IOException, ClassNotFoundException { + byte[] data = Base64.getDecoder().decode(s); + ObjectInputStream ois = new ObjectInputStream(new ByteArrayInputStream(data)); + HashMap o = (HashMap) ois.readObject(); // Unchecked cast here but should never + // be something else + ois.close(); + return o; + } + /** * Analyzes an image page like * https://www.deviantart.com/kageuri/art/RUBY-568396655 . @@ -420,9 +430,7 @@ public class DeviantartRipper extends AbstractHTMLRipper { */ private void getFullSizeURL() { - System.out.println("------------------------------------------------------------"); - System.out.println("------------------------------Searching max. Resolution for " + url - + "------------------------------"); + LOGGER.info("Searching max. Resolution for " + url); sendUpdate(STATUS.LOADING_RESOURCE, "Searching max. resolution for " + url); try { Response re = Http.url(url).connection().referrer(referer).userAgent(userAgent).cookies(getDACookie()) @@ -449,8 +457,7 @@ public class DeviantartRipper extends AbstractHTMLRipper { // Download Button if (downloadButton != null) { - System.out.println("------------------------------Download Button found: " - + downloadButton.attr("href") + "------------------------------"); + LOGGER.info("Download Button found: " + downloadButton.attr("href")); Response download = Http.url(downloadButton.attr("href")).connection().cookies(getDACookie()) .method(Method.GET).referrer(referer).userAgent(userAgent).ignoreContentType(true) @@ -459,9 +466,9 @@ public class DeviantartRipper extends AbstractHTMLRipper { String[] filetypePart = download.header("Content-Disposition").split("\\."); - System.out.println("------------------------------Found Image URL------------------------------"); - System.out.println("------------------------------" + url + "------------------------------"); - System.out.println("------------------------------" + location + "------------------------------"); + LOGGER.info("Found Image URL"); + LOGGER.info(url); + LOGGER.info(location); addURLToDownload(location, "", "", "", new HashMap(), title + "." + filetypePart[filetypePart.length - 1]); @@ -475,22 +482,19 @@ public class DeviantartRipper extends AbstractHTMLRipper { String source = ""; if (image == null) { - System.out.println( - "------------------------------!!!ERROR on " + url + " !!!------------------------------"); + LOGGER.error("ERROR on " + url); - System.out.println("------------------------------!!!Cookies: " + getDACookie() - + " ------------------------------"); - System.out.println(div); - sendUpdate(STATUS.DOWNLOAD_ERRORED, "!!!ERROR!!!\n" + url); + LOGGER.error("Cookies: " + getDACookie() + " "); + LOGGER.error(div); + sendUpdate(STATUS.DOWNLOAD_ERRORED, "ERROR at\n" + url); return; } // When it is text art (e.g. story) the only image is the avator (profile // picture) if (image.hasClass("avatar")) { - System.out.println( - "------------------------------No Image found, probably text art------------------------------"); - System.out.println(url); + LOGGER.error("No Image found, probably text art"); + LOGGER.error(url); return; } @@ -500,17 +504,16 @@ public class DeviantartRipper extends AbstractHTMLRipper { // Image page uses scaled down version. Split at /v1/ to receive max size. if (parts.length > 2) { - System.out.println( - "------------------------------Unexpected URL Format------------------------------"); - sendUpdate(STATUS.DOWNLOAD_WARN, "Unexpected URL Format - Risky Try"); + LOGGER.error("Unexpected URL Format"); + sendUpdate(STATUS.DOWNLOAD_ERRORED, "Unexpected URL Format"); return; } String[] tmpParts = parts[0].split("\\."); - System.out.println("------------------------------Found Image URL------------------------------"); - System.out.println("------------------------------" + url + "------------------------------"); - System.out.println("------------------------------" + parts[0] + "------------------------------"); + LOGGER.info("Found Image URL"); + LOGGER.info(url); + LOGGER.info(parts[0]); addURLToDownload(new URL(parts[0]), "", "", "", new HashMap(), title + "." + tmpParts[tmpParts.length - 1]); @@ -520,8 +523,7 @@ public class DeviantartRipper extends AbstractHTMLRipper { e.printStackTrace(); } - System.out.println( - "------------------------------No Full Size URL for: " + url + "------------------------------"); + LOGGER.error("No Full Size URL for: " + url); sendUpdate(STATUS.DOWNLOAD_ERRORED, "No image found for " + url); return; From 7e7421d8bc39fdc5ea4b4f209ae41b410b53e27e Mon Sep 17 00:00:00 2001 From: MrPlaygon <-> Date: Fri, 1 Mar 2019 18:15:42 +0100 Subject: [PATCH 6/9] Check for valid login to reduce ban rate Check for deactivated Account before trying to rip album --- .../ripper/rippers/DeviantartRipper.java | 134 +++++++++++++++--- 1 file changed, 116 insertions(+), 18 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java index 7045ac5c..a6b0c295 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java @@ -15,13 +15,16 @@ import java.io.Serializable; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; +import java.util.Arrays; import java.util.Base64; import java.util.HashMap; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.jsoup.Connection; import org.jsoup.Connection.Method; import org.jsoup.Connection.Response; import org.jsoup.nodes.Document; @@ -35,7 +38,7 @@ import org.jsoup.select.Elements; * NOT using Deviantart API like the old JSON ripper because it is SLOW * and somehow annoying to use. Things to consider: Using the API might * be less work/maintenance later because APIs do not change as - * frequently as HTML source code...? + * frequently as HTML source code does...? * * * @@ -55,6 +58,16 @@ import org.jsoup.select.Elements; * https://www.deviantart.com/revpeng/gallery/67734353/Siren-Lee-Agent-of-S-I-R-E-N-S * * + * Deactivated account: + * + * https://www.deviantart.com/gingerbreadpony + * + * Banned Account: + * + * https://www.deviantart.com/ghostofflossenburg + * + * + * * * Login Data (PLEASE DONT ACTUALLY USE!!!): * @@ -74,10 +87,14 @@ public class DeviantartRipper extends AbstractHTMLRipper { private int offset = 0; private boolean usingCatPath = false; private int downloadCount = 0; - private Map cookies = null; + private Map cookies = new HashMap(); private DownloadThreadPool deviantartThreadPool = new DownloadThreadPool("deviantart"); private ArrayList names = new ArrayList(); + List allowedCookies = Arrays.asList("agegate_state", "userinfo", "auth", "auth_secure"); + + private Connection conn = null; + // Constants private final String referer = "https://www.deviantart.com/"; private final String userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:65.0) Gecko/20100101 Firefox/65.0"; @@ -104,8 +121,29 @@ public class DeviantartRipper extends AbstractHTMLRipper { @Override protected Document getFirstPage() throws IOException { + if (isDeactivated()) { + throw new IOException("Account Deactivated"); + } login(); - return Http.url(urlWithParams(this.offset)).cookies(getDACookie()).referrer(referer).userAgent(userAgent).get(); + + // Saving connection to reuse later for following pages. + this.conn = Http.url(urlWithParams(this.offset)).cookies(getDACookie()).referrer(this.referer) + .userAgent(this.userAgent).connection(); + + return this.conn.get(); + } + + /** + * Checks if the URL refers to a deactivated account using the HTTP status Codes + * + * @return true when the account is good + * @throws IOException when the account is deactivated + */ + private boolean isDeactivated() throws IOException { + Response res = Http.url(this.url).connection().followRedirects(true).referrer(this.referer) + .userAgent(this.userAgent).execute(); + return res.statusCode() != 200 ? true : false; + } /** @@ -121,18 +159,20 @@ public class DeviantartRipper extends AbstractHTMLRipper { try { String dACookies = Utils.getConfigString(utilsKey, null); - this.cookies = dACookies != null ? deserialize(dACookies) : null; + updateCookie(dACookies != null ? deserialize(dACookies) : null); } catch (ClassNotFoundException e) { e.printStackTrace(); } - if (this.cookies == null) { - LOGGER.info("Log in now"); + if (getDACookie() == null || !checkLogin()) { + LOGGER.info("Do Login now"); // Do login now // Load login page Response res = Http.url("https://www.deviantart.com/users/login").connection().method(Method.GET) .referrer(referer).userAgent(userAgent).execute(); + updateCookie(res.cookies()); + // Find tokens Document doc = res.parse(); Element form = doc.getElementById("login"); @@ -143,8 +183,8 @@ public class DeviantartRipper extends AbstractHTMLRipper { // Build Login Data HashMap loginData = new HashMap(); loginData.put("challenge", ""); - loginData.put("username", username); - loginData.put("password", password); + loginData.put("username", this.username); + loginData.put("password", this.password); loginData.put("remember_me", "1"); loginData.put("validate_token", token); loginData.put("validate_key", key); @@ -153,7 +193,7 @@ public class DeviantartRipper extends AbstractHTMLRipper { // Log in using data. Handle redirect res = Http.url("https://www.deviantart.com/users/login").connection().referrer(referer).userAgent(userAgent) .method(Method.POST).data(loginData).cookies(cookies).followRedirects(false).execute(); - this.cookies = res.cookies(); + updateCookie(res.cookies()); res = Http.url(res.header("location")).connection().referrer(referer).userAgent(userAgent) .method(Method.GET).cookies(cookies).followRedirects(false).execute(); @@ -161,15 +201,15 @@ public class DeviantartRipper extends AbstractHTMLRipper { // Store cookies updateCookie(res.cookies()); - // Apply agegate - this.cookies.put("agegate_state", "1"); // Write Cookie to file for other RipMe Instances or later use - Utils.setConfigString(utilsKey, serialize(new HashMap(this.cookies))); + Utils.setConfigString(utilsKey, serialize(new HashMap(getDACookie()))); Utils.saveConfig(); // save now because of other instances that might work simultaneously + }else { + LOGGER.info("No new Login needed"); } - LOGGER.info("DA Cookies: " + this.cookies); + LOGGER.info("DA Cookies: " + getDACookie()); } /** @@ -178,8 +218,10 @@ public class DeviantartRipper extends AbstractHTMLRipper { @Override public Document getNextPage(Document doc) throws IOException { this.offset += 24; - Response re = Http.url(urlWithParams(this.offset)).cookies(getDACookie()).referrer(referer).userAgent(userAgent) - .response(); + this.conn.url(urlWithParams(this.offset)).cookies(getDACookie()); + Response re = this.conn.execute(); +// Response re = Http.url(urlWithParams(this.offset)).cookies(getDACookie()).referrer(referer).userAgent(userAgent) +// .response(); updateCookie(re.cookies()); Document docu = re.parse(); Elements messages = docu.getElementsByClass("message"); @@ -348,15 +390,36 @@ public class DeviantartRipper extends AbstractHTMLRipper { return this.cookies; } + /** + * Updates cookies + * @param m new Cookies + */ private void updateCookie(Map m) { + Iterator iter = m.keySet().iterator(); + while (iter.hasNext()) { + String current = iter.next(); + if (!this.allowedCookies.contains(current)) { + //m.remove(current); + iter.remove(); + } + } + LOGGER.info("Updating Cookies"); - LOGGER.info("Old Cookies: " + this.cookies + " "); + LOGGER.info("Old Cookies: " + getDACookie() + " "); LOGGER.info("New Cookies: " + m + " "); this.cookies.putAll(m); this.cookies.put("agegate_state", "1"); - LOGGER.info("Merged Cookies: " + this.cookies + " "); + LOGGER.info("Merged Cookies: " + getDACookie() + " "); + try { + Utils.setConfigString(utilsKey, serialize(new HashMap(getDACookie()))); + Utils.saveConfig(); + } catch (IOException e) { + e.printStackTrace(); + } + + } /** @@ -379,7 +442,7 @@ public class DeviantartRipper extends AbstractHTMLRipper { /** * Recreates the object from the base64 encoded String. Used for Cookies * - * @param s the base64 encoded string + * @param s the Base64 encoded string * @return the Cookie Map * @throws IOException * @throws ClassNotFoundException @@ -393,6 +456,41 @@ public class DeviantartRipper extends AbstractHTMLRipper { return o; } + /** + * Checks if the current cookies are still valid/usable. Also checks if agegate + * is given. + * + * @return True when all is good. + */ + private boolean checkLogin() { + if (!getDACookie().containsKey("agegate_state")) { + LOGGER.info("No agegate key"); + return false; + } else if (!getDACookie().get("agegate_state").equals("1")) { + LOGGER.info("Wrong agegate value"); + return false; + } + + try { + LOGGER.info("Login with Cookies: " + getDACookie()); + Response res = Http.url("https://www.deviantart.com/users/login").connection().followRedirects(true) + .cookies(getDACookie()).referrer(this.referer).userAgent(this.userAgent).execute(); + if (!res.url().toExternalForm().equals("https://www.deviantart.com/users/login")) { + LOGGER.info("Cookies are valid"); + LOGGER.info(res.url()); + return true; + } else { + LOGGER.info("Cookies invalid. Wrong URL: " + res.url()); + LOGGER.info(res.statusCode()); + LOGGER.info(res.parse()); + return false; + } + } catch (IOException e) { + e.printStackTrace(); + return false; + } + } + /** * Analyzes an image page like * https://www.deviantart.com/kageuri/art/RUBY-568396655 . From 3781737786948b9ebfc94ed9fade618caa79a6b0 Mon Sep 17 00:00:00 2001 From: MrPlaygon <-> Date: Fri, 1 Mar 2019 18:26:16 +0100 Subject: [PATCH 7/9] Custom Login possible now --- .../rarchives/ripme/ripper/rippers/DeviantartRipper.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java index a6b0c295..6a171228 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java @@ -157,6 +157,8 @@ public class DeviantartRipper extends AbstractHTMLRipper { */ private void login() throws IOException { + String customUsername = Utils.getConfigString("DeviantartCustomLoginUsername", this.username); + String customPassword = Utils.getConfigString("DeviantartCustomLoginPassword", this.password); try { String dACookies = Utils.getConfigString(utilsKey, null); updateCookie(dACookies != null ? deserialize(dACookies) : null); @@ -183,8 +185,8 @@ public class DeviantartRipper extends AbstractHTMLRipper { // Build Login Data HashMap loginData = new HashMap(); loginData.put("challenge", ""); - loginData.put("username", this.username); - loginData.put("password", this.password); + loginData.put("username", customUsername); + loginData.put("password", customPassword); loginData.put("remember_me", "1"); loginData.put("validate_token", token); loginData.put("validate_key", key); From ed2082dfe8b2804ea0c6a99355e57fe031c5cc6c Mon Sep 17 00:00:00 2001 From: MrPlaygon <-> Date: Thu, 4 Apr 2019 20:12:41 +0200 Subject: [PATCH 8/9] Added check if login URL results in 404 for development purpose --- .../ripper/rippers/DeviantartRipper.java | 20 +++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java index 6a171228..80b78f1a 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java @@ -207,7 +207,7 @@ public class DeviantartRipper extends AbstractHTMLRipper { Utils.setConfigString(utilsKey, serialize(new HashMap(getDACookie()))); Utils.saveConfig(); // save now because of other instances that might work simultaneously - }else { + } else { LOGGER.info("No new Login needed"); } @@ -394,6 +394,7 @@ public class DeviantartRipper extends AbstractHTMLRipper { /** * Updates cookies + * * @param m new Cookies */ private void updateCookie(Map m) { @@ -402,7 +403,7 @@ public class DeviantartRipper extends AbstractHTMLRipper { while (iter.hasNext()) { String current = iter.next(); if (!this.allowedCookies.contains(current)) { - //m.remove(current); + // m.remove(current); iter.remove(); } } @@ -420,8 +421,7 @@ public class DeviantartRipper extends AbstractHTMLRipper { } catch (IOException e) { e.printStackTrace(); } - - + } /** @@ -564,13 +564,14 @@ public class DeviantartRipper extends AbstractHTMLRipper { .followRedirects(true).execute(); URL location = download.url(); + System.out.println("----------------> " + url); String[] filetypePart = download.header("Content-Disposition").split("\\."); LOGGER.info("Found Image URL"); LOGGER.info(url); LOGGER.info(location); - addURLToDownload(location, "", "", "", new HashMap(), + addURLToDownload(location, "", "", "", getDACookie(), title + "." + filetypePart[filetypePart.length - 1]); return; } @@ -614,7 +615,14 @@ public class DeviantartRipper extends AbstractHTMLRipper { LOGGER.info("Found Image URL"); LOGGER.info(url); LOGGER.info(parts[0]); - + while (Http.url(parts[0]).connection().execute().statusCode() == 404) { + try { + LOGGER.error("404 on " + url); + Thread.sleep(1000); + } catch (Exception e) { + e.printStackTrace(); + } + } addURLToDownload(new URL(parts[0]), "", "", "", new HashMap(), title + "." + tmpParts[tmpParts.length - 1]); return; From 10663b0fede530d5f1fc0fc5a29d598fb181c1f7 Mon Sep 17 00:00:00 2001 From: MrPlaygon <-> Date: Sat, 6 Apr 2019 10:26:00 +0200 Subject: [PATCH 9/9] NullPointer Fix. Thanks to cyian-1756 --- .../com/rarchives/ripme/ripper/rippers/DeviantartRipper.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java index 80b78f1a..8a24e2c9 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java @@ -399,6 +399,10 @@ public class DeviantartRipper extends AbstractHTMLRipper { */ private void updateCookie(Map m) { + if (m == null) { + return; + } + Iterator iter = m.keySet().iterator(); while (iter.hasNext()) { String current = iter.next();