Merge pull request #1229 from cyian-1756/ig-fix

Fixed instagram ripper
2025-08-30 09:10:44 +02:00 · 2019-03-02 17:05:56 -05:00
parent 2e2131b98f 323e44db21
commit fb989a4bdf
1 changed files with 29 additions and 10 deletions
--- a/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java
+++ b/src/main/java/com/rarchives/ripme/ripper/rippers/InstagramRipper.java
@@ -39,9 +39,6 @@ public class InstagramRipper extends AbstractJSONRipper {
    private String userID;
    private String rhx_gis = null;
    private String csrftoken;
-    // Run into a weird issue with Jsoup cutting some json pages in half, this is a work around
-    // see https://github.com/RipMeApp/ripme/issues/601
-    private String workAroundJsonString;



@@ -192,6 +189,9 @@ public class InstagramRipper extends AbstractJSONRipper {
        Document p = resp.parse();
        // Get the query hash so we can download the next page
        qHash = getQHash(p);
+        if (qHash == null) {
+            throw new IOException("Unable to extract qhash from page");
+        }
        return getJSONFromPage(p);
    }

@@ -398,7 +398,6 @@ public class InstagramRipper extends AbstractJSONRipper {
    }

    private boolean pageHasImages(JSONObject json) {
-        LOGGER.info(json);
        int numberOfImages = json.getJSONObject("data").getJSONObject("user")
                .getJSONObject("edge_owner_to_timeline_media").getJSONArray("edges").length();
        if (numberOfImages == 0) {
@@ -422,23 +421,36 @@ public class InstagramRipper extends AbstractJSONRipper {

            }
            in.close();
-            workAroundJsonString = sb.toString();
            return new JSONObject(sb.toString());

        } catch (MalformedURLException e) {
-            LOGGER.info("Unable to get query_hash, " + url + " is a malformed URL");
+            LOGGER.info("Unable to get page, " + url + " is a malformed URL");
            return null;
        } catch (IOException e) {
-            LOGGER.info("Unable to get query_hash");
+            LOGGER.info("Unable to get page");
            LOGGER.info(e.getMessage());
            return null;
        }
    }

+    private String getQhashUrl(Document doc) {
+        for(Element el : doc.select("link[rel=preload]")) {
+            if (el.attr("href").contains("ProfilePageContainer")) {
+                return el.attr("href");
+            }
+        }
+        for(Element el : doc.select("link[rel=preload]")) {
+            if (el.attr("href").contains("metro")) {
+                return el.attr("href");
+            }
+        }
+        return null;
+    }
+
    private String getQHash(Document doc) {
-        String jsFileURL = "https://www.instagram.com" + doc.select("link[rel=preload]").attr("href");
+        String jsFileURL = "https://www.instagram.com" + getQhashUrl(doc);
        StringBuilder sb = new StringBuilder();
-        Document jsPage;
+        LOGGER.info(jsFileURL);
        try {
            // We can't use Jsoup here because it won't download a non-html file larger than a MB
            // even if you set maxBodySize to 0
@@ -454,7 +466,7 @@ public class InstagramRipper extends AbstractJSONRipper {
            LOGGER.info("Unable to get query_hash, " + jsFileURL + " is a malformed URL");
            return null;
        } catch (IOException e) {
-            LOGGER.info("Unable to get query_hash");
+            LOGGER.info("Unable to get query_hash from " + jsFileURL);
            LOGGER.info(e.getMessage());
            return null;
        }
@@ -468,6 +480,12 @@ public class InstagramRipper extends AbstractJSONRipper {
                m = jsP.matcher(sb.toString());
                if (m.find()) {
                    return m.group(1);
+                } else {
+                    jsP = Pattern.compile(",u=.([a-zA-Z0-9]+).");
+                    m = jsP.matcher(sb.toString());
+                    if (m.find()) {
+                        return m.group(1);
+                    }
                }
            }

@@ -477,6 +495,7 @@ public class InstagramRipper extends AbstractJSONRipper {
            if (m.find()) {
                return m.group(1);
            }
+
        }
        LOGGER.error("Could not find query_hash on " + jsFileURL);
        return null;