Added some comments; ripper can now rip from more than one page

2025-08-29 16:50:10 +02:00 · 2018-11-14 21:58:00 -05:00
parent 0a27fc3089
commit b685b087aa
1 changed files with 15 additions and 40 deletions
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/FlickrRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/FlickrRipper.java
@@ -19,8 +19,6 @@ import org.jsoup.nodes.Element;
 public class FlickrRipper extends AbstractHTMLRipper {
    private int page = 1;
    private Set<String> attempted = new HashSet<>();
    private Document albumDoc = null;
    private final DownloadThreadPool flickrThreadPool;
    @Override
@@ -61,9 +59,11 @@ public class FlickrRipper extends AbstractHTMLRipper {
        }
        return new URL(sUrl);
    }
-
+    // FLickr is one of those sites what includes a api key in sites javascript
    // TODO let the user provide their own api key
    private String getAPIKey(Document doc) {
-        Pattern p; Matcher m;
+        Pattern p;
        Matcher m;
        p = Pattern.compile("root.YUI_config.flickr.api.site_key = \"([a-zA-Z0-9]*)\";");
        for (Element e : doc.select("script")) {
            // You have to use .html here as .text will strip most of the javascript
@@ -204,38 +204,6 @@ public class FlickrRipper extends AbstractHTMLRipper {
        return albumDoc;
    }
 //    @Override
 //    public Document getNextPage(Document doc) throws IOException {
 //        if (isThisATest()) {
 //            return null;
 //        }
 //        // Find how many pages there are
 //        int lastPage = 0;
 //        for (Element apage : doc.select("a[data-track^=page-]")) {
 //            String lastPageStr = apage.attr("data-track").replace("page-", "");
 //            lastPage = Integer.parseInt(lastPageStr);
 //        }
 //        // If we're at the last page, stop.
 //        if (page >= lastPage) {
 //            throw new IOException("No more pages");
 //        }
 //        // Load the next page
 //        page++;
 //        albumDoc = null;
 //        String nextURL = this.url.toExternalForm();
 //        if (!nextURL.endsWith("/")) {
 //            nextURL += "/";
 //        }
 //        nextURL += "page" + page + "/";
 //        // Wait a bit
 //        try {
 //            Thread.sleep(1000);
 //        } catch (InterruptedException e) {
 //            throw new IOException("Interrupted while waiting to load next page " + nextURL);
 //        }
 //        return Http.url(nextURL).get();
 //    }
    @Override
    public List<String> getURLsFromPage(Document doc) {
        List<String> imageURLs = new ArrayList<>();
@@ -246,13 +214,14 @@ public class FlickrRipper extends AbstractHTMLRipper {
            if (jsonData.has("stat") && jsonData.getString("stat").equals("fail")) {
                break;
            } else {
                int totalPages = jsonData.getJSONObject("photoset").getInt("pages");
                LOGGER.info(jsonData);
                JSONArray pictures = jsonData.getJSONObject("photoset").getJSONArray("photo");
                for (int i = 0; i < pictures.length(); i++) {
                    LOGGER.info(i);
                    JSONObject data = (JSONObject) pictures.get(i);
-                    // flickr has a real funny way listing the image sizes, so we have to loop over all these until we
+                    // TODO this is a total hack, we should loop over all image sizes and pick the biggest one and not
-                    // find one that works
+                    // just assume
                    List<String> imageSizes = Arrays.asList("k", "h", "l", "n", "c", "z", "t");
                    for ( String imageSize : imageSizes) {
                        try {
@@ -260,11 +229,17 @@ public class FlickrRipper extends AbstractHTMLRipper {
                            LOGGER.info("Adding picture " + data.getString("url_" + imageSize));
                            break;
                        } catch (org.json.JSONException ignore) {
-
+                        // TODO warn the user when we hit a Malformed url
                        } catch (MalformedURLException e) {}
                    }
                }
-                break;
+                if (x >= totalPages) {
                    // The rips done
                    break;
                }
                // We have more pages to download so we rerun the loop
                x++;
            }
        }