mirror of
https://github.com/RipMeApp/ripme.git
synced 2025-08-29 16:50:10 +02:00
Added some comments; ripper can now rip from more than one page
This commit is contained in:
@@ -19,8 +19,6 @@ import org.jsoup.nodes.Element;
|
|||||||
|
|
||||||
public class FlickrRipper extends AbstractHTMLRipper {
|
public class FlickrRipper extends AbstractHTMLRipper {
|
||||||
|
|
||||||
private int page = 1;
|
|
||||||
private Set<String> attempted = new HashSet<>();
|
|
||||||
private Document albumDoc = null;
|
private Document albumDoc = null;
|
||||||
private final DownloadThreadPool flickrThreadPool;
|
private final DownloadThreadPool flickrThreadPool;
|
||||||
@Override
|
@Override
|
||||||
@@ -61,9 +59,11 @@ public class FlickrRipper extends AbstractHTMLRipper {
|
|||||||
}
|
}
|
||||||
return new URL(sUrl);
|
return new URL(sUrl);
|
||||||
}
|
}
|
||||||
|
// FLickr is one of those sites what includes a api key in sites javascript
|
||||||
|
// TODO let the user provide their own api key
|
||||||
private String getAPIKey(Document doc) {
|
private String getAPIKey(Document doc) {
|
||||||
Pattern p; Matcher m;
|
Pattern p;
|
||||||
|
Matcher m;
|
||||||
p = Pattern.compile("root.YUI_config.flickr.api.site_key = \"([a-zA-Z0-9]*)\";");
|
p = Pattern.compile("root.YUI_config.flickr.api.site_key = \"([a-zA-Z0-9]*)\";");
|
||||||
for (Element e : doc.select("script")) {
|
for (Element e : doc.select("script")) {
|
||||||
// You have to use .html here as .text will strip most of the javascript
|
// You have to use .html here as .text will strip most of the javascript
|
||||||
@@ -204,38 +204,6 @@ public class FlickrRipper extends AbstractHTMLRipper {
|
|||||||
return albumDoc;
|
return albumDoc;
|
||||||
}
|
}
|
||||||
|
|
||||||
// @Override
|
|
||||||
// public Document getNextPage(Document doc) throws IOException {
|
|
||||||
// if (isThisATest()) {
|
|
||||||
// return null;
|
|
||||||
// }
|
|
||||||
// // Find how many pages there are
|
|
||||||
// int lastPage = 0;
|
|
||||||
// for (Element apage : doc.select("a[data-track^=page-]")) {
|
|
||||||
// String lastPageStr = apage.attr("data-track").replace("page-", "");
|
|
||||||
// lastPage = Integer.parseInt(lastPageStr);
|
|
||||||
// }
|
|
||||||
// // If we're at the last page, stop.
|
|
||||||
// if (page >= lastPage) {
|
|
||||||
// throw new IOException("No more pages");
|
|
||||||
// }
|
|
||||||
// // Load the next page
|
|
||||||
// page++;
|
|
||||||
// albumDoc = null;
|
|
||||||
// String nextURL = this.url.toExternalForm();
|
|
||||||
// if (!nextURL.endsWith("/")) {
|
|
||||||
// nextURL += "/";
|
|
||||||
// }
|
|
||||||
// nextURL += "page" + page + "/";
|
|
||||||
// // Wait a bit
|
|
||||||
// try {
|
|
||||||
// Thread.sleep(1000);
|
|
||||||
// } catch (InterruptedException e) {
|
|
||||||
// throw new IOException("Interrupted while waiting to load next page " + nextURL);
|
|
||||||
// }
|
|
||||||
// return Http.url(nextURL).get();
|
|
||||||
// }
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<String> getURLsFromPage(Document doc) {
|
public List<String> getURLsFromPage(Document doc) {
|
||||||
List<String> imageURLs = new ArrayList<>();
|
List<String> imageURLs = new ArrayList<>();
|
||||||
@@ -246,13 +214,14 @@ public class FlickrRipper extends AbstractHTMLRipper {
|
|||||||
if (jsonData.has("stat") && jsonData.getString("stat").equals("fail")) {
|
if (jsonData.has("stat") && jsonData.getString("stat").equals("fail")) {
|
||||||
break;
|
break;
|
||||||
} else {
|
} else {
|
||||||
|
int totalPages = jsonData.getJSONObject("photoset").getInt("pages");
|
||||||
LOGGER.info(jsonData);
|
LOGGER.info(jsonData);
|
||||||
JSONArray pictures = jsonData.getJSONObject("photoset").getJSONArray("photo");
|
JSONArray pictures = jsonData.getJSONObject("photoset").getJSONArray("photo");
|
||||||
for (int i = 0; i < pictures.length(); i++) {
|
for (int i = 0; i < pictures.length(); i++) {
|
||||||
LOGGER.info(i);
|
LOGGER.info(i);
|
||||||
JSONObject data = (JSONObject) pictures.get(i);
|
JSONObject data = (JSONObject) pictures.get(i);
|
||||||
// flickr has a real funny way listing the image sizes, so we have to loop over all these until we
|
// TODO this is a total hack, we should loop over all image sizes and pick the biggest one and not
|
||||||
// find one that works
|
// just assume
|
||||||
List<String> imageSizes = Arrays.asList("k", "h", "l", "n", "c", "z", "t");
|
List<String> imageSizes = Arrays.asList("k", "h", "l", "n", "c", "z", "t");
|
||||||
for ( String imageSize : imageSizes) {
|
for ( String imageSize : imageSizes) {
|
||||||
try {
|
try {
|
||||||
@@ -260,11 +229,17 @@ public class FlickrRipper extends AbstractHTMLRipper {
|
|||||||
LOGGER.info("Adding picture " + data.getString("url_" + imageSize));
|
LOGGER.info("Adding picture " + data.getString("url_" + imageSize));
|
||||||
break;
|
break;
|
||||||
} catch (org.json.JSONException ignore) {
|
} catch (org.json.JSONException ignore) {
|
||||||
|
// TODO warn the user when we hit a Malformed url
|
||||||
} catch (MalformedURLException e) {}
|
} catch (MalformedURLException e) {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
break;
|
if (x >= totalPages) {
|
||||||
|
// The rips done
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// We have more pages to download so we rerun the loop
|
||||||
|
x++;
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user