From 0a27fc3089303f605e9b81789fdc6126bc5f9898 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Wed, 14 Nov 2018 21:05:37 -0500 Subject: [PATCH 1/7] Flickr ripper can now rip single pages from photo sets --- .../ripme/ripper/rippers/FlickrRipper.java | 311 +++++++++--------- 1 file changed, 147 insertions(+), 164 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/FlickrRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/FlickrRipper.java index 10e786d3..2171e9dd 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/FlickrRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/FlickrRipper.java @@ -3,27 +3,19 @@ package com.rarchives.ripme.ripper.rippers; import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; +import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; -import org.jsoup.Connection.Method; -import org.jsoup.Connection.Response; -import org.jsoup.Jsoup; +import com.rarchives.ripme.ui.RipStatusMessage; +import org.json.JSONArray; +import org.json.JSONObject; import org.jsoup.nodes.Document; -import org.jsoup.nodes.Element; -import org.jsoup.select.Elements; import com.rarchives.ripme.ripper.AbstractHTMLRipper; import com.rarchives.ripme.ripper.DownloadThreadPool; -import com.rarchives.ripme.utils.Base64; import com.rarchives.ripme.utils.Http; -import com.rarchives.ripme.utils.Utils; +import org.jsoup.nodes.Element; public class FlickrRipper extends AbstractHTMLRipper { @@ -36,6 +28,11 @@ public class FlickrRipper extends AbstractHTMLRipper { return flickrThreadPool; } + @Override + public boolean hasASAPRipping() { + return true; + } + public FlickrRipper(URL url) throws IOException { super(url); flickrThreadPool = new DownloadThreadPool(); @@ -50,6 +47,7 @@ public class FlickrRipper extends AbstractHTMLRipper { return "flickr.com"; } + @Override public URL sanitizeURL(URL url) throws MalformedURLException { String sUrl = url.toExternalForm(); // Strip out https @@ -64,6 +62,84 @@ public class FlickrRipper extends AbstractHTMLRipper { return new URL(sUrl); } + private String getAPIKey(Document doc) { + Pattern p; Matcher m; + p = Pattern.compile("root.YUI_config.flickr.api.site_key = \"([a-zA-Z0-9]*)\";"); + for (Element e : doc.select("script")) { + // You have to use .html here as .text will strip most of the javascript + m = p.matcher(e.html()); + if (m.find()) { + LOGGER.info("Found api key:" + m.group(1)); + return m.group(1); + } + } + LOGGER.error("Unable to get api key"); + // A nice error message to tell our users what went wrong + sendUpdate(RipStatusMessage.STATUS.DOWNLOAD_WARN, "Unable to extract api key from flickr"); + sendUpdate(RipStatusMessage.STATUS.DOWNLOAD_WARN, "Using hardcoded api key"); + return "935649baf09b2cc50628e2b306e4da5d"; + } + + // The flickr api is a monster of weird settings so we just request everything that the webview does + private String apiURLBuilder(String photoset, String pageNumber, String apiKey) { + LOGGER.info("https://api.flickr.com/services/rest?extras=can_addmeta," + + "can_comment,can_download,can_share,contact,count_comments,count_faves,count_views,date_taken," + + "date_upload,icon_urls_deep,isfavorite,ispro,license,media,needs_interstitial,owner_name," + + "owner_datecreate,path_alias,realname,rotation,safety_level,secret_k,secret_h,url_c,url_f,url_h,url_k," + + "url_l,url_m,url_n,url_o,url_q,url_s,url_sq,url_t,url_z,visibility,visibility_source,o_dims," + + "is_marketplace_printable,is_marketplace_licensable,publiceditability&per_page=100&page="+ pageNumber + "&" + + "get_user_info=1&primary_photo_extras=url_c,%20url_h,%20url_k,%20url_l,%20url_m,%20url_n,%20url_o" + + ",%20url_q,%20url_s,%20url_sq,%20url_t,%20url_z,%20needs_interstitial,%20can_share&jump_to=&" + + "photoset_id=" + photoset + "&viewerNSID=&method=flickr.photosets.getPhotos&csrf=&" + + "api_key=" + apiKey + "&format=json&hermes=1&hermesClient=1&reqId=358ed6a0&nojsoncallback=1"); + return "https://api.flickr.com/services/rest?extras=can_addmeta," + + "can_comment,can_download,can_share,contact,count_comments,count_faves,count_views,date_taken," + + "date_upload,icon_urls_deep,isfavorite,ispro,license,media,needs_interstitial,owner_name," + + "owner_datecreate,path_alias,realname,rotation,safety_level,secret_k,secret_h,url_c,url_f,url_h,url_k," + + "url_l,url_m,url_n,url_o,url_q,url_s,url_sq,url_t,url_z,visibility,visibility_source,o_dims," + + "is_marketplace_printable,is_marketplace_licensable,publiceditability&per_page=100&page="+ pageNumber + "&" + + "get_user_info=1&primary_photo_extras=url_c,%20url_h,%20url_k,%20url_l,%20url_m,%20url_n,%20url_o" + + ",%20url_q,%20url_s,%20url_sq,%20url_t,%20url_z,%20needs_interstitial,%20can_share&jump_to=&" + + "photoset_id=" + photoset + "&viewerNSID=&method=flickr.photosets.getPhotos&csrf=&" + + "api_key=" + apiKey + "&format=json&hermes=1&hermesClient=1&reqId=358ed6a0&nojsoncallback=1"; + } + + private JSONObject getJSON(String page, String apiKey) { + URL pageURL = null; + String apiURL = null; + try { + apiURL = apiURLBuilder(getPhotosetID(url.toExternalForm()), page, apiKey); + pageURL = new URL(apiURL); + } catch (MalformedURLException e) { + LOGGER.error("Unable to get api link " + apiURL + " is malformed"); + } + try { + LOGGER.info(Http.url(pageURL).ignoreContentType().get().text()); + return new JSONObject(Http.url(pageURL).ignoreContentType().get().text()); + } catch (IOException e) { + LOGGER.error("Unable to get api link " + apiURL + " is malformed"); + return null; + } + } + + private String getPhotosetID(String url) { + Pattern p; Matcher m; + + // Root: https://www.flickr.com/photos/115858035@N04/ + // Album: https://www.flickr.com/photos/115858035@N04/sets/72157644042355643/ + + final String domainRegex = "https?://[wm.]*flickr.com"; + final String userRegex = "[a-zA-Z0-9@]+"; + // Album + p = Pattern.compile("^" + domainRegex + "/photos/(" + userRegex + ")/(sets|albums)/([0-9]+)/?.*$"); + m = p.matcher(url); + if (m.matches()) { + return m.group(3); + } + return null; + } + + @Override public String getAlbumTitle(URL url) throws MalformedURLException { if (!url.toExternalForm().contains("/sets/")) { return super.getAlbumTitle(url); @@ -128,168 +204,75 @@ public class FlickrRipper extends AbstractHTMLRipper { return albumDoc; } - @Override - public Document getNextPage(Document doc) throws IOException { - if (isThisATest()) { - return null; - } - // Find how many pages there are - int lastPage = 0; - for (Element apage : doc.select("a[data-track^=page-]")) { - String lastPageStr = apage.attr("data-track").replace("page-", ""); - lastPage = Integer.parseInt(lastPageStr); - } - // If we're at the last page, stop. - if (page >= lastPage) { - throw new IOException("No more pages"); - } - // Load the next page - page++; - albumDoc = null; - String nextURL = this.url.toExternalForm(); - if (!nextURL.endsWith("/")) { - nextURL += "/"; - } - nextURL += "page" + page + "/"; - // Wait a bit - try { - Thread.sleep(1000); - } catch (InterruptedException e) { - throw new IOException("Interrupted while waiting to load next page " + nextURL); - } - return Http.url(nextURL).get(); - } +// @Override +// public Document getNextPage(Document doc) throws IOException { +// if (isThisATest()) { +// return null; +// } +// // Find how many pages there are +// int lastPage = 0; +// for (Element apage : doc.select("a[data-track^=page-]")) { +// String lastPageStr = apage.attr("data-track").replace("page-", ""); +// lastPage = Integer.parseInt(lastPageStr); +// } +// // If we're at the last page, stop. +// if (page >= lastPage) { +// throw new IOException("No more pages"); +// } +// // Load the next page +// page++; +// albumDoc = null; +// String nextURL = this.url.toExternalForm(); +// if (!nextURL.endsWith("/")) { +// nextURL += "/"; +// } +// nextURL += "page" + page + "/"; +// // Wait a bit +// try { +// Thread.sleep(1000); +// } catch (InterruptedException e) { +// throw new IOException("Interrupted while waiting to load next page " + nextURL); +// } +// return Http.url(nextURL).get(); +// } @Override - public List getURLsFromPage(Document page) { + public List getURLsFromPage(Document doc) { List imageURLs = new ArrayList<>(); - for (Element thumb : page.select("a[data-track=photo-click]")) { - /* TODO find a way to persist the image title - String imageTitle = null; - if (thumb.hasAttr("title")) { - imageTitle = thumb.attr("title"); - } - */ - String imagePage = thumb.attr("href"); - if (imagePage.startsWith("/")) { - imagePage = "http://www.flickr.com" + imagePage; - } - if (imagePage.contains("/in/")) { - imagePage = imagePage.substring(0, imagePage.indexOf("/in/") + 1); - } - if (!imagePage.endsWith("/")) { - imagePage += "/"; - } - imagePage += "sizes/o/"; - // Check for duplicates - if (attempted.contains(imagePage)) { - continue; - } - attempted.add(imagePage); - imageURLs.add(imagePage); - if (isThisATest()) { + int x = 1; + while (true) { + JSONObject jsonData = getJSON(String.valueOf(x), getAPIKey(doc)); + if (jsonData.has("stat") && jsonData.getString("stat").equals("fail")) { + break; + } else { + LOGGER.info(jsonData); + JSONArray pictures = jsonData.getJSONObject("photoset").getJSONArray("photo"); + for (int i = 0; i < pictures.length(); i++) { + LOGGER.info(i); + JSONObject data = (JSONObject) pictures.get(i); + // flickr has a real funny way listing the image sizes, so we have to loop over all these until we + // find one that works + List imageSizes = Arrays.asList("k", "h", "l", "n", "c", "z", "t"); + for ( String imageSize : imageSizes) { + try { + addURLToDownload(new URL(data.getString("url_" + imageSize))); + LOGGER.info("Adding picture " + data.getString("url_" + imageSize)); + break; + } catch (org.json.JSONException ignore) { + + } catch (MalformedURLException e) {} + } + } break; } } + return imageURLs; } @Override public void downloadURL(URL url, int index) { - // Add image page to threadpool to grab the image & download it - FlickrImageThread mit = new FlickrImageThread(url, index); - flickrThreadPool.addThread(mit); - } - - /** - * Login to Flickr. - * @return Cookies for logged-in session - * @throws IOException - */ - @SuppressWarnings("unused") - private Map signinToFlickr() throws IOException { - Response resp = Jsoup.connect("http://www.flickr.com/signin/") - .userAgent(USER_AGENT) - .followRedirects(true) - .method(Method.GET) - .execute(); - Document doc = resp.parse(); - Map postData = new HashMap<>(); - for (Element input : doc.select("input[type=hidden]")) { - postData.put(input.attr("name"), input.attr("value")); - } - postData.put("passwd_raw", ""); - postData.put(".save", ""); - postData.put("login", new String(Base64.decode("bGVmYWtlZGVmYWtl"))); - postData.put("passwd", new String(Base64.decode("MUZha2V5ZmFrZQ=="))); - String action = doc.select("form[method=post]").get(0).attr("action"); - resp = Jsoup.connect(action) - .cookies(resp.cookies()) - .data(postData) - .method(Method.POST) - .execute(); - return resp.cookies(); - } - - /** - * Helper class to find and download images found on "image" pages - */ - private class FlickrImageThread extends Thread { - private URL url; - private int index; - - FlickrImageThread(URL url, int index) { - super(); - this.url = url; - this.index = index; - } - - @Override - public void run() { - try { - Document doc = getLargestImagePageDocument(this.url); - Elements fullsizeImages = doc.select("div#allsizes-photo img"); - if (fullsizeImages.isEmpty()) { - LOGGER.error("Could not find flickr image at " + doc.location() + " - missing 'div#allsizes-photo img'"); - } - else { - String prefix = ""; - if (Utils.getConfigBoolean("download.save_order", true)) { - prefix = String.format("%03d_", index); - } - synchronized (flickrThreadPool) { - addURLToDownload(new URL(fullsizeImages.first().attr("src")), prefix); - } - } - } catch (IOException e) { - LOGGER.error("[!] Exception while loading/parsing " + this.url, e); - } - } - - private Document getLargestImagePageDocument(URL url) throws IOException { - // Get current page - Document doc = Http.url(url).get(); - // Look for larger image page - String largestImagePage = this.url.toExternalForm(); - for (Element olSize : doc.select("ol.sizes-list > li > ol > li")) { - Elements ola = olSize.select("a"); - if (ola.isEmpty()) { - largestImagePage = this.url.toExternalForm(); - } - else { - String candImage = ola.get(0).attr("href"); - if (candImage.startsWith("/")) { - candImage = "http://www.flickr.com" + candImage; - } - largestImagePage = candImage; - } - } - if (!largestImagePage.equals(this.url.toExternalForm())) { - // Found larger image page, get it. - doc = Http.url(largestImagePage).get(); - } - return doc; - } + addURLToDownload(url, getPrefix(index)); } } \ No newline at end of file From b685b087aa782e1df3919406934e66cff51c2856 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Wed, 14 Nov 2018 21:58:00 -0500 Subject: [PATCH 2/7] Added some comments; ripper can now rip from more than one page --- .../ripme/ripper/rippers/FlickrRipper.java | 55 +++++-------------- 1 file changed, 15 insertions(+), 40 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/FlickrRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/FlickrRipper.java index 2171e9dd..6f193f7d 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/FlickrRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/FlickrRipper.java @@ -19,8 +19,6 @@ import org.jsoup.nodes.Element; public class FlickrRipper extends AbstractHTMLRipper { - private int page = 1; - private Set attempted = new HashSet<>(); private Document albumDoc = null; private final DownloadThreadPool flickrThreadPool; @Override @@ -61,9 +59,11 @@ public class FlickrRipper extends AbstractHTMLRipper { } return new URL(sUrl); } - + // FLickr is one of those sites what includes a api key in sites javascript + // TODO let the user provide their own api key private String getAPIKey(Document doc) { - Pattern p; Matcher m; + Pattern p; + Matcher m; p = Pattern.compile("root.YUI_config.flickr.api.site_key = \"([a-zA-Z0-9]*)\";"); for (Element e : doc.select("script")) { // You have to use .html here as .text will strip most of the javascript @@ -204,38 +204,6 @@ public class FlickrRipper extends AbstractHTMLRipper { return albumDoc; } -// @Override -// public Document getNextPage(Document doc) throws IOException { -// if (isThisATest()) { -// return null; -// } -// // Find how many pages there are -// int lastPage = 0; -// for (Element apage : doc.select("a[data-track^=page-]")) { -// String lastPageStr = apage.attr("data-track").replace("page-", ""); -// lastPage = Integer.parseInt(lastPageStr); -// } -// // If we're at the last page, stop. -// if (page >= lastPage) { -// throw new IOException("No more pages"); -// } -// // Load the next page -// page++; -// albumDoc = null; -// String nextURL = this.url.toExternalForm(); -// if (!nextURL.endsWith("/")) { -// nextURL += "/"; -// } -// nextURL += "page" + page + "/"; -// // Wait a bit -// try { -// Thread.sleep(1000); -// } catch (InterruptedException e) { -// throw new IOException("Interrupted while waiting to load next page " + nextURL); -// } -// return Http.url(nextURL).get(); -// } - @Override public List getURLsFromPage(Document doc) { List imageURLs = new ArrayList<>(); @@ -246,13 +214,14 @@ public class FlickrRipper extends AbstractHTMLRipper { if (jsonData.has("stat") && jsonData.getString("stat").equals("fail")) { break; } else { + int totalPages = jsonData.getJSONObject("photoset").getInt("pages"); LOGGER.info(jsonData); JSONArray pictures = jsonData.getJSONObject("photoset").getJSONArray("photo"); for (int i = 0; i < pictures.length(); i++) { LOGGER.info(i); JSONObject data = (JSONObject) pictures.get(i); - // flickr has a real funny way listing the image sizes, so we have to loop over all these until we - // find one that works + // TODO this is a total hack, we should loop over all image sizes and pick the biggest one and not + // just assume List imageSizes = Arrays.asList("k", "h", "l", "n", "c", "z", "t"); for ( String imageSize : imageSizes) { try { @@ -260,11 +229,17 @@ public class FlickrRipper extends AbstractHTMLRipper { LOGGER.info("Adding picture " + data.getString("url_" + imageSize)); break; } catch (org.json.JSONException ignore) { - + // TODO warn the user when we hit a Malformed url } catch (MalformedURLException e) {} } } - break; + if (x >= totalPages) { + // The rips done + break; + } + // We have more pages to download so we rerun the loop + x++; + } } From 99d6fb0d06478824c69a4bc188f20a2357aa0d51 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Fri, 16 Nov 2018 05:47:46 -0500 Subject: [PATCH 3/7] Added support for hqporner videos hosted on flyflv --- .../ripme/ripper/rippers/HqpornerRipper.java | 33 ++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/HqpornerRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/HqpornerRipper.java index e95467bb..1fa27618 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/HqpornerRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/HqpornerRipper.java @@ -39,6 +39,26 @@ public class HqpornerRipper extends AbstractSingleFileRipper { return null; } + private String getVideoFromFlyFlv(String url) { + try { + logger.info("Downloading " + url); + Document page = Http.url(url).referrer(url).get(); + String[] videoSizes = { "1080p","720p","360p"}; + for (String videoSize : videoSizes) { + String urlToReturn = page.select("video > source[label=" + videoSize).attr("src"); + if (urlToReturn != null && !urlToReturn.equals("")) { + return urlToReturn; + } + } + + + + } catch (IOException e) { + logger.error("Unable to get page with video"); + } + return null; + } + private String getVideoName() { try { String filename = getGID(url); @@ -77,8 +97,19 @@ public class HqpornerRipper extends AbstractSingleFileRipper { @Override public List getURLsFromPage(Document doc) { + String videoUrl = null; List result = new ArrayList<>(); - result.add("https:" + getVideoFromMyDaddycc("https:" + doc.select("div.videoWrapper > iframe").attr("src"))); + String videoPageUrl = "https:" + doc.select("div.videoWrapper > iframe").attr("src"); + + if (videoPageUrl.contains("mydaddy")) { + videoUrl = getVideoFromMyDaddycc(videoPageUrl); + } else if (videoPageUrl.contains("flyflv")) { + videoUrl = getVideoFromFlyFlv(videoPageUrl); + } + + if (videoUrl != null) { + result.add("https:" + videoUrl); + } return result; } From 968021c2767cefc1798bd332a4944250d086a86b Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Wed, 21 Nov 2018 05:35:26 -0500 Subject: [PATCH 4/7] 1.7.70: Added arabic translation; Updater now works on java 10; Fixed mangadex ripper --- pom.xml | 2 +- ripme.json | 5 +++-- src/main/java/com/rarchives/ripme/ui/UpdateUtils.java | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/pom.xml b/pom.xml index 0131181e..c51df4c0 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ com.rarchives.ripme ripme jar - 1.7.69 + 1.7.70 ripme http://rip.rarchives.com diff --git a/ripme.json b/ripme.json index d6d6a9a7..93de460c 100644 --- a/ripme.json +++ b/ripme.json @@ -1,6 +1,6 @@ { - "latestVersion": "1.7.69", "changeList": [ + "1.7.70: Added arabic translation; Updater now works on java 10; Fixed mangadex ripper", "1.7.69: Fixes TheChive bug so that it can now rip gifs; e621 ripper now rips all media types; Upgraded org.apache.httpcomponents to 4.3.6; Added ripper for Mangadex.org; Added ripper for various duckmovie frontends; reddit ripper no longer freezes when ripping certain links", "1.7.68: Added support for 55chan.org; Now limits file name length to 255 chars; fixed Tsumino ripper", "1.7.67: Added yuki.la ripper; Fixed xhamster ripper; Fixed instagram ripper; Added porncomix.one ripper; Fixed bug which caused large files to be download when running tests", @@ -241,5 +241,6 @@ "1.0.3: Added VK.com ripper", "1.0.1: Added auto-update functionality" ], - "currentHash": "5c312c50aed4a33112d3c77cf9cae68be1793b167ba2a741c33453e556a66c73" + "latestVersion": "1.7.70", + "currentHash": "d838bc3a6ed86bb422dd53dbd58f11e28001cd844dc1f2fdee98fe004d1bc237" } \ No newline at end of file diff --git a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java index c76fd959..ccb92047 100644 --- a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java +++ b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java @@ -20,7 +20,7 @@ import com.rarchives.ripme.utils.Utils; public class UpdateUtils { private static final Logger logger = Logger.getLogger(UpdateUtils.class); - private static final String DEFAULT_VERSION = "1.7.69"; + private static final String DEFAULT_VERSION = "1.7.70"; private static final String REPO_NAME = "ripmeapp/ripme"; private static final String updateJsonURL = "https://raw.githubusercontent.com/" + REPO_NAME + "/master/ripme.json"; private static final String mainFileName = "ripme.jar"; From f4948475e6941af5e1ae9875094436a3149d5a93 Mon Sep 17 00:00:00 2001 From: Fenris95 Date: Thu, 22 Nov 2018 13:53:50 +0000 Subject: [PATCH 5/7] Added 4Channel.org for SFW 4Chan Boards "Attention: All work safe boards are soon going to be on the 4channel.org domain." --- src/main/java/com/rarchives/ripme/ripper/rippers/ChanRipper.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ChanRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ChanRipper.java index f44aab43..e6456012 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/ChanRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ChanRipper.java @@ -18,6 +18,7 @@ import org.jsoup.nodes.Element; public class ChanRipper extends AbstractHTMLRipper { private static List explicit_domains = Arrays.asList( new ChanSite("boards.4chan.org", Arrays.asList("4cdn.org", "is.4chan.org", "is2.4chan.org", "is3.4chan.org")), + new ChanSite("boards.4channel.org", Arrays.asList("4cdn.org", "is.4chan.org", "is2.4chan.org", "is3.4chan.org")), new ChanSite("4archive.org", "imgur.com"), new ChanSite("archive.4plebs.org", "img.4plebs.org"), new ChanSite("yuki.la", "ii.yuki.la"), From 78882177ae8c52a80546765525588466e1d274ca Mon Sep 17 00:00:00 2001 From: Sorunome Date: Thu, 22 Nov 2018 19:11:31 +0100 Subject: [PATCH 6/7] add derpi ripper --- .../ripme/ripper/rippers/DerpiRipper.java | 149 ++++++++++++++++++ 1 file changed, 149 insertions(+) create mode 100644 src/main/java/com/rarchives/ripme/ripper/rippers/DerpiRipper.java diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/DerpiRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/DerpiRipper.java new file mode 100644 index 00000000..1feaf692 --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/DerpiRipper.java @@ -0,0 +1,149 @@ +package com.rarchives.ripme.ripper.rippers; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import com.rarchives.ripme.ripper.AbstractJSONRipper; +import com.rarchives.ripme.utils.Http; +import com.rarchives.ripme.utils.Utils; + +import org.json.JSONObject; +import org.json.JSONArray; + +public class DerpiRipper extends AbstractJSONRipper { + + private URL currUrl; + private Integer currPage; + + public DerpiRipper(URL url) throws IOException { + super(url); + } + + private String apiUrl; + + @Override + public String getHost() { + return "DerpiBooru"; + } + + @Override + public String getDomain() { + return "derpibooru.org"; + } + + @Override + public URL sanitizeURL(URL url) throws MalformedURLException { + String u = url.toExternalForm(); + String[] uu = u.split("\\?", 2); + String newU = uu[0]; + if (newU.substring(newU.length() - 1).equals("/")) { + newU = newU.substring(0, newU.length() - 1); + } + newU += ".json?"; + if (uu.length > 1) { + newU += uu[1]; + } + + String key = Utils.getConfigString("derpi.key", ""); + if (!key.equals("")) { + newU += "&key=" + key; + } + + return new URL(newU); + } + + @Override + public String getGID(URL url) throws MalformedURLException { + currUrl = url; + currPage = 1; + + // search + Pattern p = Pattern.compile("^https?://derpibooru\\.org/search\\.json\\?q=([^&]+).*?$"); + Matcher m = p.matcher(url.toExternalForm()); + if (m.matches()) { + return "search_" + m.group(1); + } + + // tags + p = Pattern.compile("^https?://derpibooru\\.org/tags/([^.]+)\\.json.*?$"); + m = p.matcher(url.toExternalForm()); + if (m.matches()) { + return "tags_" + m.group(1); + } + + // galleries + p = Pattern.compile("^https?://derpibooru\\.org/galleries/([^/]+)/(\\d+)\\.json.*?$"); + m = p.matcher(url.toExternalForm()); + if (m.matches()) { + return "galleries_" + m.group(1) + "_" + m.group(2); + } + + // single image + p = Pattern.compile("^https?://derpibooru\\.org/(\\d+)\\.json.*?$"); + m = p.matcher(url.toExternalForm()); + if (m.matches()) { + return "image_" + m.group(1); + } + + throw new MalformedURLException("Unable to find image in " + url); + } + + @Override + public JSONObject getFirstPage() throws IOException { + return Http.url(url).getJSON(); + } + + @Override + public JSONObject getNextPage(JSONObject doc) throws IOException { + currPage++; + String u = currUrl.toExternalForm() + "&page=" + Integer.toString(currPage); + JSONObject json = Http.url(new URL(u)).getJSON(); + JSONArray arr; + if (json.has("images")) { + arr = json.getJSONArray("images"); + } else if (json.has("search")) { + arr = json.getJSONArray("search"); + } else { + throw new IOException("No more images"); + } + if (arr.length() == 0) { + throw new IOException("No more images"); + } + return json; + } + + private String getImageUrlFromJson(JSONObject json) { + return "https:" + json.getJSONObject("representations").getString("full"); + } + + @Override + public List getURLsFromJSON(JSONObject json) { + List imageURLs = new ArrayList<>(); + + JSONArray arr = null; + if (json.has("images")) { + arr = json.getJSONArray("images"); + } else if (json.has("search")) { + arr = json.getJSONArray("search"); + } + if (arr != null) { + for (int i = 0; i < arr.length(); i++){ + imageURLs.add(this.getImageUrlFromJson(arr.getJSONObject(i))); + } + } else { + imageURLs.add(this.getImageUrlFromJson(json)); + } + return imageURLs; + } + + @Override + public void downloadURL(URL url, int index) { + // we don't set an index prefix here as derpibooru already prefixes their images with their unique IDs + addURLToDownload(url, ""); + } +} From a4117133f8c66367a04c77470d1287dba26abafc Mon Sep 17 00:00:00 2001 From: PwnicornDev <3987812+PwnicornDev@users.noreply.github.com> Date: Fri, 23 Nov 2018 21:30:55 +0000 Subject: [PATCH 7/7] Update userRegex, fixes #1063 --- .../com/rarchives/ripme/ripper/rippers/FlickrRipper.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/FlickrRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/FlickrRipper.java index 6f193f7d..6ad75003 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/FlickrRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/FlickrRipper.java @@ -129,7 +129,7 @@ public class FlickrRipper extends AbstractHTMLRipper { // Album: https://www.flickr.com/photos/115858035@N04/sets/72157644042355643/ final String domainRegex = "https?://[wm.]*flickr.com"; - final String userRegex = "[a-zA-Z0-9@]+"; + final String userRegex = "[a-zA-Z0-9@_-]+"; // Album p = Pattern.compile("^" + domainRegex + "/photos/(" + userRegex + ")/(sets|albums)/([0-9]+)/?.*$"); m = p.matcher(url); @@ -168,7 +168,7 @@ public class FlickrRipper extends AbstractHTMLRipper { // Album: https://www.flickr.com/photos/115858035@N04/sets/72157644042355643/ final String domainRegex = "https?://[wm.]*flickr.com"; - final String userRegex = "[a-zA-Z0-9@]+"; + final String userRegex = "[a-zA-Z0-9@_-]+"; // Album p = Pattern.compile("^" + domainRegex + "/photos/(" + userRegex + ")/sets/([0-9]+)/?.*$"); m = p.matcher(url.toExternalForm()); @@ -250,4 +250,4 @@ public class FlickrRipper extends AbstractHTMLRipper { public void downloadURL(URL url, int index) { addURLToDownload(url, getPrefix(index)); } -} \ No newline at end of file +}