From 4af469189321057fef03c167f0177d2086d3086c Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Fri, 9 Mar 2018 13:36:53 -0500 Subject: [PATCH 1/2] lastseen feature now works with instagram --- .../ripme/ripper/rippers/InstagramRipper.java | 59 ++++++++++--------- 1 file changed, 32 insertions(+), 27 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java index d1f16535..364b645c 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java @@ -24,6 +24,7 @@ import com.rarchives.ripme.utils.Utils; public class InstagramRipper extends AbstractHTMLRipper { + String nextPageID = ""; private String userID; @@ -198,7 +199,6 @@ public class InstagramRipper extends AbstractHTMLRipper { @Override public List getURLsFromPage(Document doc) { - String nextPageID = ""; List imageURLs = new ArrayList<>(); JSONObject json = new JSONObject(); try { @@ -261,33 +261,7 @@ public class InstagramRipper extends AbstractHTMLRipper { break; } } - // Rip the next page - if (!nextPageID.equals("") && !isThisATest()) { - if (url.toExternalForm().contains("/tags/")) { - try { - // Sleep for a while to avoid a ban - sleep(2500); - if (url.toExternalForm().substring(url.toExternalForm().length() - 1).equals("/")) { - getURLsFromPage(Http.url(url.toExternalForm() + "?max_id=" + nextPageID).get()); - } else { - getURLsFromPage(Http.url(url.toExternalForm() + "/?max_id=" + nextPageID).get()); - } - } catch (IOException e) { - return imageURLs; - } - - } - try { - // Sleep for a while to avoid a ban - sleep(2500); - getURLsFromPage(Http.url("https://www.instagram.com/" + userID + "/?max_id=" + nextPageID).get()); - } catch (IOException e) { - return imageURLs; - } - } else { - logger.warn("Can't get net page"); - } } else { // We're ripping from a single page logger.info("Ripping from single page"); imageURLs = getPostsFromSinglePage(doc); @@ -296,6 +270,37 @@ public class InstagramRipper extends AbstractHTMLRipper { return imageURLs; } + @Override + public Document getNextPage(Document doc) { + if (!nextPageID.equals("") && !isThisATest()) { + if (url.toExternalForm().contains("/tags/")) { + try { + // Sleep for a while to avoid a ban + sleep(2500); + if (url.toExternalForm().substring(url.toExternalForm().length() - 1).equals("/")) { + return Http.url(url.toExternalForm() + "?max_id=" + nextPageID).get(); + } else { + return Http.url(url.toExternalForm() + "/?max_id=" + nextPageID).get(); + } + + } catch (IOException e) { + return null; + } + + } + try { + // Sleep for a while to avoid a ban + sleep(2500); + return Http.url("https://www.instagram.com/" + userID + "/?max_id=" + nextPageID).get(); + } catch (IOException e) { + return null; + } + } else { + logger.warn("Can't get net page"); + } + return null; + } + @Override public void downloadURL(URL url, int index) { addURLToDownload(url); From ecf7a4b623605cb071f0e8299c9d51f09461e716 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Fri, 9 Mar 2018 18:06:01 -0500 Subject: [PATCH 2/2] IG ripper now no longer errors out on last page --- .../ripme/ripper/rippers/InstagramRipper.java | 39 +++++++++++++++---- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java index 364b645c..ab44edfd 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java @@ -271,34 +271,43 @@ public class InstagramRipper extends AbstractHTMLRipper { } @Override - public Document getNextPage(Document doc) { + public Document getNextPage(Document doc) throws IOException { + Document toreturn; if (!nextPageID.equals("") && !isThisATest()) { if (url.toExternalForm().contains("/tags/")) { try { // Sleep for a while to avoid a ban sleep(2500); if (url.toExternalForm().substring(url.toExternalForm().length() - 1).equals("/")) { - return Http.url(url.toExternalForm() + "?max_id=" + nextPageID).get(); + toreturn = Http.url(url.toExternalForm() + "?max_id=" + nextPageID).get(); } else { - return Http.url(url.toExternalForm() + "/?max_id=" + nextPageID).get(); + toreturn = Http.url(url.toExternalForm() + "/?max_id=" + nextPageID).get(); } + logger.info(toreturn.html()); + if (!hasImage(toreturn)) { + throw new IOException("No more pages"); + } + return toreturn; } catch (IOException e) { - return null; + throw new IOException("No more pages"); } } try { // Sleep for a while to avoid a ban sleep(2500); - return Http.url("https://www.instagram.com/" + userID + "/?max_id=" + nextPageID).get(); + toreturn = Http.url("https://www.instagram.com/" + userID + "/?max_id=" + nextPageID).get(); + if (!hasImage(toreturn)) { + throw new IOException("No more pages"); + } + return toreturn; } catch (IOException e) { return null; } } else { - logger.warn("Can't get net page"); + throw new IOException("No more pages"); } - return null; } @Override @@ -306,4 +315,20 @@ public class InstagramRipper extends AbstractHTMLRipper { addURLToDownload(url); } + private boolean hasImage(Document doc) { + try { + JSONObject json = getJSONFromPage(doc); + JSONArray profilePage = json.getJSONObject("entry_data").getJSONArray("ProfilePage"); + JSONArray datas = profilePage.getJSONObject(0).getJSONObject("user").getJSONObject("media").getJSONArray("nodes"); + logger.info(datas.length()); + if (datas.length() == 0) { + return false; + } + return true; + } catch (IOException e) { + return false; + } + + } + }