mirror of
https://github.com/RipMeApp/ripme.git
synced 2025-02-22 15:12:37 +01:00
Added support for ripping entire Flickr profiles
This commit is contained in:
parent
560a417386
commit
15919e906f
@ -10,6 +10,7 @@ import java.util.regex.Pattern;
|
|||||||
import com.rarchives.ripme.ui.RipStatusMessage;
|
import com.rarchives.ripme.ui.RipStatusMessage;
|
||||||
import org.json.JSONArray;
|
import org.json.JSONArray;
|
||||||
import org.json.JSONObject;
|
import org.json.JSONObject;
|
||||||
|
import org.json.JSONException;
|
||||||
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Document;
|
||||||
|
|
||||||
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||||
@ -21,6 +22,22 @@ public class FlickrRipper extends AbstractHTMLRipper {
|
|||||||
|
|
||||||
private Document albumDoc = null;
|
private Document albumDoc = null;
|
||||||
private final DownloadThreadPool flickrThreadPool;
|
private final DownloadThreadPool flickrThreadPool;
|
||||||
|
|
||||||
|
private enum UrlType {
|
||||||
|
USER,
|
||||||
|
PHOTOSET
|
||||||
|
}
|
||||||
|
|
||||||
|
private class Album {
|
||||||
|
final UrlType type;
|
||||||
|
final String id;
|
||||||
|
|
||||||
|
Album(UrlType type, String id) {
|
||||||
|
this.type = type;
|
||||||
|
this.id = id;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public DownloadThreadPool getThreadPool() {
|
public DownloadThreadPool getThreadPool() {
|
||||||
return flickrThreadPool;
|
return flickrThreadPool;
|
||||||
@ -81,40 +98,44 @@ public class FlickrRipper extends AbstractHTMLRipper {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// The flickr api is a monster of weird settings so we just request everything that the webview does
|
// The flickr api is a monster of weird settings so we just request everything that the webview does
|
||||||
private String apiURLBuilder(String photoset, String pageNumber, String apiKey) {
|
private String apiURLBuilder(Album album, String pageNumber, String apiKey) {
|
||||||
LOGGER.info("https://api.flickr.com/services/rest?extras=can_addmeta," +
|
String method = null;
|
||||||
"can_comment,can_download,can_share,contact,count_comments,count_faves,count_views,date_taken," +
|
String idField = null;
|
||||||
"date_upload,icon_urls_deep,isfavorite,ispro,license,media,needs_interstitial,owner_name," +
|
switch (album.type) {
|
||||||
"owner_datecreate,path_alias,realname,rotation,safety_level,secret_k,secret_h,url_c,url_f,url_h,url_k," +
|
case PHOTOSET:
|
||||||
"url_l,url_m,url_n,url_o,url_q,url_s,url_sq,url_t,url_z,visibility,visibility_source,o_dims," +
|
method = "flickr.photosets.getPhotos";
|
||||||
"is_marketplace_printable,is_marketplace_licensable,publiceditability&per_page=100&page="+ pageNumber + "&" +
|
idField = "photoset_id=" + album.id;
|
||||||
"get_user_info=1&primary_photo_extras=url_c,%20url_h,%20url_k,%20url_l,%20url_m,%20url_n,%20url_o" +
|
break;
|
||||||
",%20url_q,%20url_s,%20url_sq,%20url_t,%20url_z,%20needs_interstitial,%20can_share&jump_to=&" +
|
case USER:
|
||||||
"photoset_id=" + photoset + "&viewerNSID=&method=flickr.photosets.getPhotos&csrf=&" +
|
method = "flickr.people.getPhotos";
|
||||||
"api_key=" + apiKey + "&format=json&hermes=1&hermesClient=1&reqId=358ed6a0&nojsoncallback=1");
|
idField = "user_id=" + album.id;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
return "https://api.flickr.com/services/rest?extras=can_addmeta," +
|
return "https://api.flickr.com/services/rest?extras=can_addmeta," +
|
||||||
"can_comment,can_download,can_share,contact,count_comments,count_faves,count_views,date_taken," +
|
"can_comment,can_download,can_share,contact,count_comments,count_faves,count_views,date_taken," +
|
||||||
"date_upload,icon_urls_deep,isfavorite,ispro,license,media,needs_interstitial,owner_name," +
|
"date_upload,icon_urls_deep,isfavorite,ispro,license,media,needs_interstitial,owner_name," +
|
||||||
"owner_datecreate,path_alias,realname,rotation,safety_level,secret_k,secret_h,url_c,url_f,url_h,url_k," +
|
"owner_datecreate,path_alias,realname,rotation,safety_level,secret_k,secret_h,url_c,url_f,url_h,url_k," +
|
||||||
"url_l,url_m,url_n,url_o,url_q,url_s,url_sq,url_t,url_z,visibility,visibility_source,o_dims," +
|
"url_l,url_m,url_n,url_o,url_q,url_s,url_sq,url_t,url_z,visibility,visibility_source,o_dims," +
|
||||||
"is_marketplace_printable,is_marketplace_licensable,publiceditability&per_page=100&page="+ pageNumber + "&" +
|
"is_marketplace_printable,is_marketplace_licensable,publiceditability&per_page=100&page="+ pageNumber + "&" +
|
||||||
"get_user_info=1&primary_photo_extras=url_c,%20url_h,%20url_k,%20url_l,%20url_m,%20url_n,%20url_o" +
|
"get_user_info=1&primary_photo_extras=url_c,%20url_h,%20url_k,%20url_l,%20url_m,%20url_n,%20url_o" +
|
||||||
",%20url_q,%20url_s,%20url_sq,%20url_t,%20url_z,%20needs_interstitial,%20can_share&jump_to=&" +
|
",%20url_q,%20url_s,%20url_sq,%20url_t,%20url_z,%20needs_interstitial,%20can_share&jump_to=&" +
|
||||||
"photoset_id=" + photoset + "&viewerNSID=&method=flickr.photosets.getPhotos&csrf=&" +
|
idField + "&viewerNSID=&method=" + method + "&csrf=&" +
|
||||||
"api_key=" + apiKey + "&format=json&hermes=1&hermesClient=1&reqId=358ed6a0&nojsoncallback=1";
|
"api_key=" + apiKey + "&format=json&hermes=1&hermesClient=1&reqId=358ed6a0&nojsoncallback=1";
|
||||||
}
|
}
|
||||||
|
|
||||||
private JSONObject getJSON(String page, String apiKey) {
|
private JSONObject getJSON(String page, String apiKey) {
|
||||||
URL pageURL = null;
|
URL pageURL = null;
|
||||||
String apiURL = null;
|
String apiURL = null;
|
||||||
try {
|
try {
|
||||||
apiURL = apiURLBuilder(getPhotosetID(url.toExternalForm()), page, apiKey);
|
apiURL = apiURLBuilder(getAlbum(url.toExternalForm()), page, apiKey);
|
||||||
pageURL = new URL(apiURL);
|
pageURL = new URL(apiURL);
|
||||||
} catch (MalformedURLException e) {
|
} catch (MalformedURLException e) {
|
||||||
LOGGER.error("Unable to get api link " + apiURL + " is malformed");
|
LOGGER.error("Unable to get api link " + apiURL + " is malformed");
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
LOGGER.info(Http.url(pageURL).ignoreContentType().get().text());
|
LOGGER.info("Fetching: " + apiURL);
|
||||||
|
LOGGER.info("Response: " + Http.url(pageURL).ignoreContentType().get().text());
|
||||||
return new JSONObject(Http.url(pageURL).ignoreContentType().get().text());
|
return new JSONObject(Http.url(pageURL).ignoreContentType().get().text());
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
LOGGER.error("Unable to get api link " + apiURL + " is malformed");
|
LOGGER.error("Unable to get api link " + apiURL + " is malformed");
|
||||||
@ -122,21 +143,32 @@ public class FlickrRipper extends AbstractHTMLRipper {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private String getPhotosetID(String url) {
|
private Album getAlbum(String url) throws MalformedURLException {
|
||||||
Pattern p; Matcher m;
|
Pattern p; Matcher m;
|
||||||
|
|
||||||
// Root: https://www.flickr.com/photos/115858035@N04/
|
// User photostream: https://www.flickr.com/photos/115858035@N04/
|
||||||
// Album: https://www.flickr.com/photos/115858035@N04/sets/72157644042355643/
|
// Album: https://www.flickr.com/photos/115858035@N04/sets/72157644042355643/
|
||||||
|
|
||||||
final String domainRegex = "https?://[wm.]*flickr.com";
|
final String domainRegex = "https?://[wm.]*flickr.com";
|
||||||
final String userRegex = "[a-zA-Z0-9@_-]+";
|
final String userRegex = "[a-zA-Z0-9@_-]+";
|
||||||
// Album
|
// Album
|
||||||
p = Pattern.compile("^" + domainRegex + "/photos/(" + userRegex + ")/(sets|albums)/([0-9]+)/?.*$");
|
p = Pattern.compile("^" + domainRegex + "/photos/" + userRegex + "/(sets|albums)/([0-9]+)/?.*$");
|
||||||
m = p.matcher(url);
|
m = p.matcher(url);
|
||||||
if (m.matches()) {
|
if (m.matches()) {
|
||||||
return m.group(3);
|
return new Album(UrlType.PHOTOSET, m.group(2));
|
||||||
}
|
}
|
||||||
return null;
|
|
||||||
|
// User photostream
|
||||||
|
p = Pattern.compile("^" + domainRegex + "/photos/(" + userRegex + ")/?$");
|
||||||
|
m = p.matcher(url);
|
||||||
|
if (m.matches()) {
|
||||||
|
return new Album(UrlType.USER, m.group(1));
|
||||||
|
}
|
||||||
|
|
||||||
|
String errorMessage = "Failed to extract photoset ID from url: " + url;
|
||||||
|
|
||||||
|
LOGGER.error(errorMessage);
|
||||||
|
throw new MalformedURLException(errorMessage);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -144,7 +176,7 @@ public class FlickrRipper extends AbstractHTMLRipper {
|
|||||||
if (!url.toExternalForm().contains("/sets/")) {
|
if (!url.toExternalForm().contains("/sets/")) {
|
||||||
return super.getAlbumTitle(url);
|
return super.getAlbumTitle(url);
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
// Attempt to use album title as GID
|
// Attempt to use album title as GID
|
||||||
Document doc = getFirstPage();
|
Document doc = getFirstPage();
|
||||||
String user = url.toExternalForm();
|
String user = url.toExternalForm();
|
||||||
@ -214,9 +246,23 @@ public class FlickrRipper extends AbstractHTMLRipper {
|
|||||||
if (jsonData.has("stat") && jsonData.getString("stat").equals("fail")) {
|
if (jsonData.has("stat") && jsonData.getString("stat").equals("fail")) {
|
||||||
break;
|
break;
|
||||||
} else {
|
} else {
|
||||||
int totalPages = jsonData.getJSONObject("photoset").getInt("pages");
|
// Determine root key
|
||||||
|
JSONObject rootData;
|
||||||
|
|
||||||
|
try {
|
||||||
|
rootData = jsonData.getJSONObject("photoset");
|
||||||
|
} catch (JSONException e) {
|
||||||
|
try {
|
||||||
|
rootData = jsonData.getJSONObject("photos");
|
||||||
|
} catch (JSONException innerE) {
|
||||||
|
LOGGER.error("Unable to find photos in response");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int totalPages = rootData.getInt("pages");
|
||||||
LOGGER.info(jsonData);
|
LOGGER.info(jsonData);
|
||||||
JSONArray pictures = jsonData.getJSONObject("photoset").getJSONArray("photo");
|
JSONArray pictures = rootData.getJSONArray("photo");
|
||||||
for (int i = 0; i < pictures.length(); i++) {
|
for (int i = 0; i < pictures.length(); i++) {
|
||||||
LOGGER.info(i);
|
LOGGER.info(i);
|
||||||
JSONObject data = (JSONObject) pictures.get(i);
|
JSONObject data = (JSONObject) pictures.get(i);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user