mirror of
https://github.com/RipMeApp/ripme.git
synced 2025-08-30 09:10:44 +02:00
@@ -39,9 +39,6 @@ public class InstagramRipper extends AbstractJSONRipper {
|
||||
private String userID;
|
||||
private String rhx_gis = null;
|
||||
private String csrftoken;
|
||||
// Run into a weird issue with Jsoup cutting some json pages in half, this is a work around
|
||||
// see https://github.com/RipMeApp/ripme/issues/601
|
||||
private String workAroundJsonString;
|
||||
|
||||
|
||||
|
||||
@@ -192,6 +189,9 @@ public class InstagramRipper extends AbstractJSONRipper {
|
||||
Document p = resp.parse();
|
||||
// Get the query hash so we can download the next page
|
||||
qHash = getQHash(p);
|
||||
if (qHash == null) {
|
||||
throw new IOException("Unable to extract qhash from page");
|
||||
}
|
||||
return getJSONFromPage(p);
|
||||
}
|
||||
|
||||
@@ -398,7 +398,6 @@ public class InstagramRipper extends AbstractJSONRipper {
|
||||
}
|
||||
|
||||
private boolean pageHasImages(JSONObject json) {
|
||||
LOGGER.info(json);
|
||||
int numberOfImages = json.getJSONObject("data").getJSONObject("user")
|
||||
.getJSONObject("edge_owner_to_timeline_media").getJSONArray("edges").length();
|
||||
if (numberOfImages == 0) {
|
||||
@@ -422,23 +421,36 @@ public class InstagramRipper extends AbstractJSONRipper {
|
||||
|
||||
}
|
||||
in.close();
|
||||
workAroundJsonString = sb.toString();
|
||||
return new JSONObject(sb.toString());
|
||||
|
||||
} catch (MalformedURLException e) {
|
||||
LOGGER.info("Unable to get query_hash, " + url + " is a malformed URL");
|
||||
LOGGER.info("Unable to get page, " + url + " is a malformed URL");
|
||||
return null;
|
||||
} catch (IOException e) {
|
||||
LOGGER.info("Unable to get query_hash");
|
||||
LOGGER.info("Unable to get page");
|
||||
LOGGER.info(e.getMessage());
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private String getQhashUrl(Document doc) {
|
||||
for(Element el : doc.select("link[rel=preload]")) {
|
||||
if (el.attr("href").contains("ProfilePageContainer")) {
|
||||
return el.attr("href");
|
||||
}
|
||||
}
|
||||
for(Element el : doc.select("link[rel=preload]")) {
|
||||
if (el.attr("href").contains("metro")) {
|
||||
return el.attr("href");
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private String getQHash(Document doc) {
|
||||
String jsFileURL = "https://www.instagram.com" + doc.select("link[rel=preload]").attr("href");
|
||||
String jsFileURL = "https://www.instagram.com" + getQhashUrl(doc);
|
||||
StringBuilder sb = new StringBuilder();
|
||||
Document jsPage;
|
||||
LOGGER.info(jsFileURL);
|
||||
try {
|
||||
// We can't use Jsoup here because it won't download a non-html file larger than a MB
|
||||
// even if you set maxBodySize to 0
|
||||
@@ -454,7 +466,7 @@ public class InstagramRipper extends AbstractJSONRipper {
|
||||
LOGGER.info("Unable to get query_hash, " + jsFileURL + " is a malformed URL");
|
||||
return null;
|
||||
} catch (IOException e) {
|
||||
LOGGER.info("Unable to get query_hash");
|
||||
LOGGER.info("Unable to get query_hash from " + jsFileURL);
|
||||
LOGGER.info(e.getMessage());
|
||||
return null;
|
||||
}
|
||||
@@ -468,6 +480,12 @@ public class InstagramRipper extends AbstractJSONRipper {
|
||||
m = jsP.matcher(sb.toString());
|
||||
if (m.find()) {
|
||||
return m.group(1);
|
||||
} else {
|
||||
jsP = Pattern.compile(",u=.([a-zA-Z0-9]+).");
|
||||
m = jsP.matcher(sb.toString());
|
||||
if (m.find()) {
|
||||
return m.group(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -477,6 +495,7 @@ public class InstagramRipper extends AbstractJSONRipper {
|
||||
if (m.find()) {
|
||||
return m.group(1);
|
||||
}
|
||||
|
||||
}
|
||||
LOGGER.error("Could not find query_hash on " + jsFileURL);
|
||||
return null;
|
||||
|
Reference in New Issue
Block a user