diff --git a/pom.xml b/pom.xml index ee7f3d69..e979e2dc 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ com.rarchives.ripme ripme jar - 1.7.84 + 1.7.85 ripme http://rip.rarchives.com diff --git a/ripme.json b/ripme.json index e722dac6..429aa562 100644 --- a/ripme.json +++ b/ripme.json @@ -1,7 +1,7 @@ { - "currentHash": "ff04585ca5a2d136174b959cf7652fd4149feceaf4071ae57f25b50d607d7370", - "latestVersion": "1.7.84", + "latestVersion": "1.7.85", "changeList": [ + "1.7.85: Fixed instagram ripper; Flickr ripper now downloads largest image", "1.7.84: Fixed instagram ripper; xhamster ripper now accepts urls with page numbers; Fixed Deviantart Ripper", "1.7.83: Added a ripper for hentaifox.com; Added ripper for Erofus.com; Fixed fsktr not ripping some images; Added support for Gfycat profiles; Added opt to disable prefix for HentaifoundryRipper ", "1.7.82: Hentai foundry now rips oldest first by default; 8muses ripper no longer makes unneeded requests; Added support for i.thechive.com", @@ -256,5 +256,6 @@ "1.0.4: Fixed spaces-in-directory bug", "1.0.3: Added VK.com ripper", "1.0.1: Added auto-update functionality" - ] + ], + "currentHash": "874aceffdad02ab8147b588641229a9743b8e78b3681b3ff5a733cbd2faa9009" } \ No newline at end of file diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/FlickrRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/FlickrRipper.java index 6ad75003..e56cb4a1 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/FlickrRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/FlickrRipper.java @@ -207,10 +207,10 @@ public class FlickrRipper extends AbstractHTMLRipper { @Override public List getURLsFromPage(Document doc) { List imageURLs = new ArrayList<>(); - + String apiKey = getAPIKey(doc); int x = 1; while (true) { - JSONObject jsonData = getJSON(String.valueOf(x), getAPIKey(doc)); + JSONObject jsonData = getJSON(String.valueOf(x), apiKey); if (jsonData.has("stat") && jsonData.getString("stat").equals("fail")) { break; } else { @@ -220,18 +220,12 @@ public class FlickrRipper extends AbstractHTMLRipper { for (int i = 0; i < pictures.length(); i++) { LOGGER.info(i); JSONObject data = (JSONObject) pictures.get(i); - // TODO this is a total hack, we should loop over all image sizes and pick the biggest one and not - // just assume - List imageSizes = Arrays.asList("k", "h", "l", "n", "c", "z", "t"); - for ( String imageSize : imageSizes) { - try { - addURLToDownload(new URL(data.getString("url_" + imageSize))); - LOGGER.info("Adding picture " + data.getString("url_" + imageSize)); - break; - } catch (org.json.JSONException ignore) { - // TODO warn the user when we hit a Malformed url - } catch (MalformedURLException e) {} + try { + addURLToDownload(getLargestImageURL(data.getString("id"), apiKey)); + } catch (MalformedURLException e) { + LOGGER.error("Flickr MalformedURLException: " + e.getMessage()); } + } if (x >= totalPages) { // The rips done @@ -250,4 +244,26 @@ public class FlickrRipper extends AbstractHTMLRipper { public void downloadURL(URL url, int index) { addURLToDownload(url, getPrefix(index)); } + + private URL getLargestImageURL(String imageID, String apiKey) throws MalformedURLException { + TreeMap imageURLMap = new TreeMap<>(); + + try { + URL imageAPIURL = new URL("https://www.flickr.com/services/rest/?method=flickr.photos.getSizes&api_key=" + apiKey + "&photo_id=" + imageID + "&format=json&nojsoncallback=1"); + JSONArray imageSizes = new JSONObject(Http.url(imageAPIURL).ignoreContentType().get().text()).getJSONObject("sizes").getJSONArray("size"); + for (int i = 0; i < imageSizes.length(); i++) { + JSONObject imageInfo = imageSizes.getJSONObject(i); + imageURLMap.put(imageInfo.getInt("width") * imageInfo.getInt("height"), imageInfo.getString("source")); + } + + } catch (org.json.JSONException e) { + LOGGER.error("Error in parsing of Flickr API: " + e.getMessage()); + } catch (MalformedURLException e) { + LOGGER.error("Malformed URL returned by API"); + } catch (IOException e) { + LOGGER.error("IOException while looking at image sizes: " + e.getMessage()); + } + + return new URL(imageURLMap.lastEntry().getValue()); + } } diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/MeituriRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/MeituriRipper.java new file mode 100644 index 00000000..4e39a985 --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/MeituriRipper.java @@ -0,0 +1,92 @@ +package com.rarchives.ripme.ripper.rippers; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; + +import com.rarchives.ripme.ripper.AbstractHTMLRipper; +import com.rarchives.ripme.utils.Http; + +public class MeituriRipper extends AbstractHTMLRipper { + public MeituriRipper(URL url) throws IOException { + super(url); + } + + @Override + public String getHost() { + return "meituri"; + } + + @Override + public String getDomain() { + return "meituri.com"; + } + + // To use in getting URLs + String albumID = ""; + + @Override + public String getGID(URL url) throws MalformedURLException { + // without escape + // ^https?://[w.]*meituri\.com/a/([0-9]+)/([0-9\.html]+)*$ + // https://www.meituri.com/a/14449/ + // also matches https://www.meituri.com/a/14449/3.html etc. + // group 1 is 14449 + Pattern p = Pattern.compile("^https?://[w.]*meituri\\.com/a/([0-9]+)/([0-9\\.html]+)*$"); + Matcher m = p.matcher(url.toExternalForm()); + if (m.matches()) { + albumID = m.group(1); + return m.group(1); + } + throw new MalformedURLException( + "Expected meituri.com URL format: " + "meituri.com/a/albumid/ - got " + url + "instead"); + } + + @Override + public Document getFirstPage() throws IOException { + return Http.url(url).get(); + } + + @Override + public List getURLsFromPage(Document doc) { + List imageURLs = new ArrayList<>(); + // Get number of images from the page + // Then generate links according to that + String numOfImages = ""; + // A very ugly way of getting "图片数量: 55P" from paragraphs + // 3rd p in div.tuji + int n = 0; + for (Element para : doc.select("div.tuji > p")) { + // 图片数量: 55P + if (n == 2) { + numOfImages = para.toString(); + } + n++; + } + // ["

图片数量:", "55P

"] + String[] splitNumOfImages = numOfImages.split(" "); + // "55P

" -> "55" -> 55 + int actualNumOfImages = Integer.parseInt(splitNumOfImages[1].replace("P

", "")); + + // Base URL: http://ii.hywly.com/a/1/albumid/imgnum.jpg + String baseURL = "http://ii.hywly.com/a/1/" + albumID + "/"; + + // Loop through and add images to the URL list + for (int i = 1; i <= actualNumOfImages; i++) { + imageURLs.add(baseURL + i + ".jpg"); + } + return imageURLs; + } + + @Override + public void downloadURL(URL url, int index) { + addURLToDownload(url, getPrefix(index)); + } +} diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/NewgroundsRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/NewgroundsRipper.java new file mode 100644 index 00000000..b3ededc4 --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/NewgroundsRipper.java @@ -0,0 +1,134 @@ +package com.rarchives.ripme.ripper.rippers; + +import com.rarchives.ripme.ripper.AbstractHTMLRipper; +import com.rarchives.ripme.utils.Http; +import org.jsoup.nodes.Document; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class NewgroundsRipper extends AbstractHTMLRipper { + + private String username = ""; // Name of artist + + // Extensions supported by Newgrounds + private List ALLOWED_EXTENSIONS = Arrays.asList("png", "gif", "jpeg", "jpg"); + + // Images are pulled 60 at a time, a new page request is needed when count == 60 + private int pageNumber = 1; + private int count = 0; + + + public NewgroundsRipper(URL url) throws IOException { + super(url); + } + + @Override + public String getHost() { + return "newgrounds"; + } + + @Override + protected String getDomain() { + return "newgrounds.com"; + } + + @Override + public String getGID(URL url) throws MalformedURLException { + Pattern p = Pattern.compile("^https?://(.+).newgrounds.com/?.*"); + Matcher m = p.matcher(url.toExternalForm()); + if (m.matches()) { + this.username = m.group(1); + return m.group(1); + } + throw new MalformedURLException("Expected newgrounds.com URL format: " + + "username.newgrounds.com/art - got " + url + " instead"); + } + + @Override + protected Document getFirstPage() throws IOException { + return Http.url("https://" + this.username + ".newgrounds.com/art").get(); + } + + @Override + public Document getNextPage(Document doc) throws IOException { + if(this.count < 60) { + throw new IOException("No more pages"); + } + this.count = 0; // New page found so reset count + return Http.url("https://" + this.username + ".newgrounds.com/art/page/" + this.pageNumber) + .header("X-Requested-With", "XMLHttpRequest").get(); // Send header to imitate scrolling + } + + @Override + protected List getURLsFromPage(Document page) { + + List imageURLs = new ArrayList<>(); + String documentHTMLString = page.toString().replaceAll(""", ""); + String findStr = "newgrounds.com\\/art\\/view\\/" + this.username; + int lastIndex = 0; + + // Index where findStr is found; each occasion contains the link to an image + ArrayList indices = new ArrayList<>(); + + while(lastIndex != -1){ + lastIndex = documentHTMLString.indexOf(findStr, lastIndex); + if(lastIndex != -1){ + this.count ++; + lastIndex += findStr.length(); + indices.add(lastIndex); + } + } + + // Retrieve direct URL for image + for(int i = 0; i < indices.size(); i++){ + String imageUrl = "https://art.ngfiles.com/images/"; + + String inLink = "https://www.newgrounds.com/art/view/" + this.username + "/"; + String s; + if(i == indices.size() - 1){ + s = documentHTMLString.substring(indices.get(i) + 2); + } else{ + s = documentHTMLString.substring(indices.get(i) + 2, indices.get(i + 1)); + } + + s = s.replaceAll("\n", "").replaceAll("\t", "") + .replaceAll("\\\\", ""); + + Pattern p = Pattern.compile("(.*?)\" class.*/thumbnails/(.*?)/(.*?)\\."); + Matcher m = p.matcher(s); + + if (m.lookingAt()) { + String testURL = m.group(3) + "_" + this.username + "_" + m.group(1); + + // Open new document to get full sized image + try { + Document imagePage = Http.url(inLink + m.group(1)).get(); + for(String extensions: this.ALLOWED_EXTENSIONS){ + if(imagePage.toString().contains(testURL + "." + extensions)){ + imageUrl += m.group(2) + "/" + m.group(3) + "_" + this.username + "_" + m.group(1) + "." + extensions; + imageURLs.add(imageUrl); + break; + } + } + + } catch (IOException e) { + LOGGER.error("IO Error on trying to check extension: " + inLink + m.group(1)); + } + } + } + this.pageNumber += 1; + return imageURLs; + } + + @Override + protected void downloadURL(URL url, int index) { + addURLToDownload(url, getPrefix(index)); + } +} diff --git a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java index b86fe867..3e1ea3b6 100644 --- a/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java +++ b/src/main/java/com/rarchives/ripme/ui/UpdateUtils.java @@ -23,7 +23,7 @@ import com.rarchives.ripme.utils.Utils; public class UpdateUtils { private static final Logger logger = Logger.getLogger(UpdateUtils.class); - private static final String DEFAULT_VERSION = "1.7.84"; + private static final String DEFAULT_VERSION = "1.7.85"; private static final String REPO_NAME = "ripmeapp/ripme"; private static final String updateJsonURL = "https://raw.githubusercontent.com/" + REPO_NAME + "/master/ripme.json"; private static String mainFileName; diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/MeituriRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/MeituriRipperTest.java new file mode 100644 index 00000000..a8505590 --- /dev/null +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/MeituriRipperTest.java @@ -0,0 +1,19 @@ +package com.rarchives.ripme.tst.ripper.rippers; + +import java.io.IOException; +import java.net.URL; + +import com.rarchives.ripme.ripper.rippers.MeituriRipper; + +public class MeituriRipperTest extends RippersTest { + public void testMeituriRip() throws IOException { + MeituriRipper ripper = new MeituriRipper(new URL("https://www.meituri.com/a/14449/")); + testRipper(ripper); + } + + public void testGetGID() throws IOException { + URL url = new URL("https://www.meituri.com/a/14449/"); + MeituriRipper ripper = new MeituriRipper(url); + assertEquals("14449", ripper.getGID(url)); + } +} diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/NewgroundsRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/NewgroundsRipperTest.java new file mode 100644 index 00000000..5486df15 --- /dev/null +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/NewgroundsRipperTest.java @@ -0,0 +1,22 @@ +package com.rarchives.ripme.tst.ripper.rippers; + +import com.rarchives.ripme.ripper.rippers.NewgroundsRipper; + +import java.io.IOException; +import java.net.URL; + +public class NewgroundsRipperTest extends RippersTest { + + public void testNewgroundsRip() throws IOException { + NewgroundsRipper ripper = new NewgroundsRipper(new URL("https://zone-sama.newgrounds.com/art")); + testRipper(ripper); + } + + public void testGetGID() throws IOException { + URL url = new URL("https://zone-sama.newgrounds.com/art"); + NewgroundsRipper ripper = new NewgroundsRipper(url); + assertEquals("zone-sama", ripper.getGID(url)); + } + + +}