From 8f3100df0aaadf59fdd48f0a49b0dbbff70b98ed Mon Sep 17 00:00:00 2001 From: buzzlightmonth <44553885+buzzlightmonth@users.noreply.github.com> Date: Sat, 19 Jan 2019 19:35:43 +0100 Subject: [PATCH 1/2] Tumblr ripper downloads highest quality available --- .../ripme/ripper/rippers/TumblrRipper.java | 20 +++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/TumblrRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/TumblrRipper.java index f27dd2f5..68462d6f 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/TumblrRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/TumblrRipper.java @@ -236,8 +236,12 @@ public class TumblrRipper extends AlbumRipper { Matcher m; p = Pattern.compile(IMAGE_PATTERN); + String fileLocation; URL fileURL; + Pattern qualP = Pattern.compile("_[0-9]+\\.(jpg|png|gif|bmp)$"); + Matcher qualM; + if (albumType == ALBUM_TYPE.LIKED) { posts = json.getJSONObject("response").getJSONArray("liked_posts"); } else { @@ -256,7 +260,12 @@ public class TumblrRipper extends AlbumRipper { for (int j = 0; j < photos.length(); j++) { photo = photos.getJSONObject(j); try { - fileURL = new URL(photo.getJSONObject("original_size").getString("url").replaceAll("http:", "https:")); + fileLocation = photo.getJSONObject("original_size").getString("url").replaceAll("http:", "https:"); + qualM = qualP.matcher(fileLocation); + if (qualM.matches()) { + fileLocation = fileLocation.replaceFirst("_[0-9]+\\.(jpg|png|gif|bmp)$", "_1280." + qualM.group(1)); + } + fileURL = new URL(fileLocation); m = p.matcher(fileURL.toString()); if (m.matches()) { @@ -281,7 +290,14 @@ public class TumblrRipper extends AlbumRipper { Document d = Jsoup.parse(post.getString("body")); if (!d.select("img").attr("src").isEmpty()) { try { - downloadURL(new URL(d.select("img").attr("src")), date); + String imgSrc = d.select("img").attr("src"); + // Set maximum quality, tumblr doesn't go any further + // If the image is any smaller, it will still get the largest available size + qualM = qualP.matcher(imgSrc); + if (qualM.matches()) { + imgSrc = imgSrc.replaceFirst("_[0-9]+\\.(jpg|png|gif|bmp)$", "_1280." + qualM.group(1)); + } + downloadURL(new URL(imgSrc), date); } catch (MalformedURLException e) { LOGGER.error("[!] Error while getting embedded image at " + post, e); return true; From 197a7411361fde1e25089c2cce9af3bd9ca9940c Mon Sep 17 00:00:00 2001 From: buzzlightmonth <44553885+buzzlightmonth@users.noreply.github.com> Date: Sat, 19 Jan 2019 20:41:23 +0100 Subject: [PATCH 2/2] Added non-capturing groups and simplified replacements --- .../rarchives/ripme/ripper/rippers/TumblrRipper.java | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/TumblrRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/TumblrRipper.java index 68462d6f..0c561d77 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/TumblrRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/TumblrRipper.java @@ -29,7 +29,7 @@ public class TumblrRipper extends AlbumRipper { private static final String DOMAIN = "tumblr.com", HOST = "tumblr", - IMAGE_PATTERN = "([^\\s]+(\\.(?i)(jpg|png|gif|bmp))$)"; + IMAGE_PATTERN = "([^\\s]+(\\.(?i)(?:jpg|png|gif|bmp))$)"; private enum ALBUM_TYPE { SUBDOMAIN, @@ -262,9 +262,7 @@ public class TumblrRipper extends AlbumRipper { try { fileLocation = photo.getJSONObject("original_size").getString("url").replaceAll("http:", "https:"); qualM = qualP.matcher(fileLocation); - if (qualM.matches()) { - fileLocation = fileLocation.replaceFirst("_[0-9]+\\.(jpg|png|gif|bmp)$", "_1280." + qualM.group(1)); - } + fileLocation = qualM.replaceFirst("_1280.$1"); fileURL = new URL(fileLocation); m = p.matcher(fileURL.toString()); @@ -294,9 +292,7 @@ public class TumblrRipper extends AlbumRipper { // Set maximum quality, tumblr doesn't go any further // If the image is any smaller, it will still get the largest available size qualM = qualP.matcher(imgSrc); - if (qualM.matches()) { - imgSrc = imgSrc.replaceFirst("_[0-9]+\\.(jpg|png|gif|bmp)$", "_1280." + qualM.group(1)); - } + imgSrc = qualM.replaceFirst("_1280.$1"); downloadURL(new URL(imgSrc), date); } catch (MalformedURLException e) { LOGGER.error("[!] Error while getting embedded image at " + post, e);