1
0
mirror of https://github.com/RipMeApp/ripme.git synced 2025-08-30 09:10:44 +02:00

Flickr ripper can now rip single pages from photo sets

This commit is contained in:
cyian-1756
2018-11-14 21:05:37 -05:00
parent 4acfb57b9c
commit 0a27fc3089

View File

@@ -3,27 +3,19 @@ package com.rarchives.ripme.ripper.rippers;
import java.io.IOException; import java.io.IOException;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.net.URL; import java.net.URL;
import java.util.ArrayList; import java.util.*;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import org.jsoup.Connection.Method; import com.rarchives.ripme.ui.RipStatusMessage;
import org.jsoup.Connection.Response; import org.json.JSONArray;
import org.jsoup.Jsoup; import org.json.JSONObject;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.rarchives.ripme.ripper.AbstractHTMLRipper; import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.ripper.DownloadThreadPool; import com.rarchives.ripme.ripper.DownloadThreadPool;
import com.rarchives.ripme.utils.Base64;
import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.Http;
import com.rarchives.ripme.utils.Utils; import org.jsoup.nodes.Element;
public class FlickrRipper extends AbstractHTMLRipper { public class FlickrRipper extends AbstractHTMLRipper {
@@ -36,6 +28,11 @@ public class FlickrRipper extends AbstractHTMLRipper {
return flickrThreadPool; return flickrThreadPool;
} }
@Override
public boolean hasASAPRipping() {
return true;
}
public FlickrRipper(URL url) throws IOException { public FlickrRipper(URL url) throws IOException {
super(url); super(url);
flickrThreadPool = new DownloadThreadPool(); flickrThreadPool = new DownloadThreadPool();
@@ -50,6 +47,7 @@ public class FlickrRipper extends AbstractHTMLRipper {
return "flickr.com"; return "flickr.com";
} }
@Override
public URL sanitizeURL(URL url) throws MalformedURLException { public URL sanitizeURL(URL url) throws MalformedURLException {
String sUrl = url.toExternalForm(); String sUrl = url.toExternalForm();
// Strip out https // Strip out https
@@ -64,6 +62,84 @@ public class FlickrRipper extends AbstractHTMLRipper {
return new URL(sUrl); return new URL(sUrl);
} }
private String getAPIKey(Document doc) {
Pattern p; Matcher m;
p = Pattern.compile("root.YUI_config.flickr.api.site_key = \"([a-zA-Z0-9]*)\";");
for (Element e : doc.select("script")) {
// You have to use .html here as .text will strip most of the javascript
m = p.matcher(e.html());
if (m.find()) {
LOGGER.info("Found api key:" + m.group(1));
return m.group(1);
}
}
LOGGER.error("Unable to get api key");
// A nice error message to tell our users what went wrong
sendUpdate(RipStatusMessage.STATUS.DOWNLOAD_WARN, "Unable to extract api key from flickr");
sendUpdate(RipStatusMessage.STATUS.DOWNLOAD_WARN, "Using hardcoded api key");
return "935649baf09b2cc50628e2b306e4da5d";
}
// The flickr api is a monster of weird settings so we just request everything that the webview does
private String apiURLBuilder(String photoset, String pageNumber, String apiKey) {
LOGGER.info("https://api.flickr.com/services/rest?extras=can_addmeta," +
"can_comment,can_download,can_share,contact,count_comments,count_faves,count_views,date_taken," +
"date_upload,icon_urls_deep,isfavorite,ispro,license,media,needs_interstitial,owner_name," +
"owner_datecreate,path_alias,realname,rotation,safety_level,secret_k,secret_h,url_c,url_f,url_h,url_k," +
"url_l,url_m,url_n,url_o,url_q,url_s,url_sq,url_t,url_z,visibility,visibility_source,o_dims," +
"is_marketplace_printable,is_marketplace_licensable,publiceditability&per_page=100&page="+ pageNumber + "&" +
"get_user_info=1&primary_photo_extras=url_c,%20url_h,%20url_k,%20url_l,%20url_m,%20url_n,%20url_o" +
",%20url_q,%20url_s,%20url_sq,%20url_t,%20url_z,%20needs_interstitial,%20can_share&jump_to=&" +
"photoset_id=" + photoset + "&viewerNSID=&method=flickr.photosets.getPhotos&csrf=&" +
"api_key=" + apiKey + "&format=json&hermes=1&hermesClient=1&reqId=358ed6a0&nojsoncallback=1");
return "https://api.flickr.com/services/rest?extras=can_addmeta," +
"can_comment,can_download,can_share,contact,count_comments,count_faves,count_views,date_taken," +
"date_upload,icon_urls_deep,isfavorite,ispro,license,media,needs_interstitial,owner_name," +
"owner_datecreate,path_alias,realname,rotation,safety_level,secret_k,secret_h,url_c,url_f,url_h,url_k," +
"url_l,url_m,url_n,url_o,url_q,url_s,url_sq,url_t,url_z,visibility,visibility_source,o_dims," +
"is_marketplace_printable,is_marketplace_licensable,publiceditability&per_page=100&page="+ pageNumber + "&" +
"get_user_info=1&primary_photo_extras=url_c,%20url_h,%20url_k,%20url_l,%20url_m,%20url_n,%20url_o" +
",%20url_q,%20url_s,%20url_sq,%20url_t,%20url_z,%20needs_interstitial,%20can_share&jump_to=&" +
"photoset_id=" + photoset + "&viewerNSID=&method=flickr.photosets.getPhotos&csrf=&" +
"api_key=" + apiKey + "&format=json&hermes=1&hermesClient=1&reqId=358ed6a0&nojsoncallback=1";
}
private JSONObject getJSON(String page, String apiKey) {
URL pageURL = null;
String apiURL = null;
try {
apiURL = apiURLBuilder(getPhotosetID(url.toExternalForm()), page, apiKey);
pageURL = new URL(apiURL);
} catch (MalformedURLException e) {
LOGGER.error("Unable to get api link " + apiURL + " is malformed");
}
try {
LOGGER.info(Http.url(pageURL).ignoreContentType().get().text());
return new JSONObject(Http.url(pageURL).ignoreContentType().get().text());
} catch (IOException e) {
LOGGER.error("Unable to get api link " + apiURL + " is malformed");
return null;
}
}
private String getPhotosetID(String url) {
Pattern p; Matcher m;
// Root: https://www.flickr.com/photos/115858035@N04/
// Album: https://www.flickr.com/photos/115858035@N04/sets/72157644042355643/
final String domainRegex = "https?://[wm.]*flickr.com";
final String userRegex = "[a-zA-Z0-9@]+";
// Album
p = Pattern.compile("^" + domainRegex + "/photos/(" + userRegex + ")/(sets|albums)/([0-9]+)/?.*$");
m = p.matcher(url);
if (m.matches()) {
return m.group(3);
}
return null;
}
@Override
public String getAlbumTitle(URL url) throws MalformedURLException { public String getAlbumTitle(URL url) throws MalformedURLException {
if (!url.toExternalForm().contains("/sets/")) { if (!url.toExternalForm().contains("/sets/")) {
return super.getAlbumTitle(url); return super.getAlbumTitle(url);
@@ -128,168 +204,75 @@ public class FlickrRipper extends AbstractHTMLRipper {
return albumDoc; return albumDoc;
} }
@Override // @Override
public Document getNextPage(Document doc) throws IOException { // public Document getNextPage(Document doc) throws IOException {
if (isThisATest()) { // if (isThisATest()) {
return null; // return null;
} // }
// Find how many pages there are // // Find how many pages there are
int lastPage = 0; // int lastPage = 0;
for (Element apage : doc.select("a[data-track^=page-]")) { // for (Element apage : doc.select("a[data-track^=page-]")) {
String lastPageStr = apage.attr("data-track").replace("page-", ""); // String lastPageStr = apage.attr("data-track").replace("page-", "");
lastPage = Integer.parseInt(lastPageStr); // lastPage = Integer.parseInt(lastPageStr);
} // }
// If we're at the last page, stop. // // If we're at the last page, stop.
if (page >= lastPage) { // if (page >= lastPage) {
throw new IOException("No more pages"); // throw new IOException("No more pages");
} // }
// Load the next page // // Load the next page
page++; // page++;
albumDoc = null; // albumDoc = null;
String nextURL = this.url.toExternalForm(); // String nextURL = this.url.toExternalForm();
if (!nextURL.endsWith("/")) { // if (!nextURL.endsWith("/")) {
nextURL += "/"; // nextURL += "/";
} // }
nextURL += "page" + page + "/"; // nextURL += "page" + page + "/";
// Wait a bit // // Wait a bit
try { // try {
Thread.sleep(1000); // Thread.sleep(1000);
} catch (InterruptedException e) { // } catch (InterruptedException e) {
throw new IOException("Interrupted while waiting to load next page " + nextURL); // throw new IOException("Interrupted while waiting to load next page " + nextURL);
} // }
return Http.url(nextURL).get(); // return Http.url(nextURL).get();
} // }
@Override @Override
public List<String> getURLsFromPage(Document page) { public List<String> getURLsFromPage(Document doc) {
List<String> imageURLs = new ArrayList<>(); List<String> imageURLs = new ArrayList<>();
for (Element thumb : page.select("a[data-track=photo-click]")) {
/* TODO find a way to persist the image title
String imageTitle = null;
if (thumb.hasAttr("title")) {
imageTitle = thumb.attr("title");
}
*/
String imagePage = thumb.attr("href");
if (imagePage.startsWith("/")) {
imagePage = "http://www.flickr.com" + imagePage;
}
if (imagePage.contains("/in/")) {
imagePage = imagePage.substring(0, imagePage.indexOf("/in/") + 1);
}
if (!imagePage.endsWith("/")) {
imagePage += "/";
}
imagePage += "sizes/o/";
// Check for duplicates int x = 1;
if (attempted.contains(imagePage)) { while (true) {
continue; JSONObject jsonData = getJSON(String.valueOf(x), getAPIKey(doc));
} if (jsonData.has("stat") && jsonData.getString("stat").equals("fail")) {
attempted.add(imagePage); break;
imageURLs.add(imagePage); } else {
if (isThisATest()) { LOGGER.info(jsonData);
JSONArray pictures = jsonData.getJSONObject("photoset").getJSONArray("photo");
for (int i = 0; i < pictures.length(); i++) {
LOGGER.info(i);
JSONObject data = (JSONObject) pictures.get(i);
// flickr has a real funny way listing the image sizes, so we have to loop over all these until we
// find one that works
List<String> imageSizes = Arrays.asList("k", "h", "l", "n", "c", "z", "t");
for ( String imageSize : imageSizes) {
try {
addURLToDownload(new URL(data.getString("url_" + imageSize)));
LOGGER.info("Adding picture " + data.getString("url_" + imageSize));
break;
} catch (org.json.JSONException ignore) {
} catch (MalformedURLException e) {}
}
}
break; break;
} }
} }
return imageURLs; return imageURLs;
} }
@Override @Override
public void downloadURL(URL url, int index) { public void downloadURL(URL url, int index) {
// Add image page to threadpool to grab the image & download it addURLToDownload(url, getPrefix(index));
FlickrImageThread mit = new FlickrImageThread(url, index);
flickrThreadPool.addThread(mit);
}
/**
* Login to Flickr.
* @return Cookies for logged-in session
* @throws IOException
*/
@SuppressWarnings("unused")
private Map<String,String> signinToFlickr() throws IOException {
Response resp = Jsoup.connect("http://www.flickr.com/signin/")
.userAgent(USER_AGENT)
.followRedirects(true)
.method(Method.GET)
.execute();
Document doc = resp.parse();
Map<String,String> postData = new HashMap<>();
for (Element input : doc.select("input[type=hidden]")) {
postData.put(input.attr("name"), input.attr("value"));
}
postData.put("passwd_raw", "");
postData.put(".save", "");
postData.put("login", new String(Base64.decode("bGVmYWtlZGVmYWtl")));
postData.put("passwd", new String(Base64.decode("MUZha2V5ZmFrZQ==")));
String action = doc.select("form[method=post]").get(0).attr("action");
resp = Jsoup.connect(action)
.cookies(resp.cookies())
.data(postData)
.method(Method.POST)
.execute();
return resp.cookies();
}
/**
* Helper class to find and download images found on "image" pages
*/
private class FlickrImageThread extends Thread {
private URL url;
private int index;
FlickrImageThread(URL url, int index) {
super();
this.url = url;
this.index = index;
}
@Override
public void run() {
try {
Document doc = getLargestImagePageDocument(this.url);
Elements fullsizeImages = doc.select("div#allsizes-photo img");
if (fullsizeImages.isEmpty()) {
LOGGER.error("Could not find flickr image at " + doc.location() + " - missing 'div#allsizes-photo img'");
}
else {
String prefix = "";
if (Utils.getConfigBoolean("download.save_order", true)) {
prefix = String.format("%03d_", index);
}
synchronized (flickrThreadPool) {
addURLToDownload(new URL(fullsizeImages.first().attr("src")), prefix);
}
}
} catch (IOException e) {
LOGGER.error("[!] Exception while loading/parsing " + this.url, e);
}
}
private Document getLargestImagePageDocument(URL url) throws IOException {
// Get current page
Document doc = Http.url(url).get();
// Look for larger image page
String largestImagePage = this.url.toExternalForm();
for (Element olSize : doc.select("ol.sizes-list > li > ol > li")) {
Elements ola = olSize.select("a");
if (ola.isEmpty()) {
largestImagePage = this.url.toExternalForm();
}
else {
String candImage = ola.get(0).attr("href");
if (candImage.startsWith("/")) {
candImage = "http://www.flickr.com" + candImage;
}
largestImagePage = candImage;
}
}
if (!largestImagePage.equals(this.url.toExternalForm())) {
// Found larger image page, get it.
doc = Http.url(largestImagePage).get();
}
return doc;
}
} }
} }