mirror of
https://github.com/RipMeApp/ripme.git
synced 2025-09-01 01:51:56 +02:00
@@ -39,9 +39,6 @@ public class InstagramRipper extends AbstractJSONRipper {
|
|||||||
private String userID;
|
private String userID;
|
||||||
private String rhx_gis = null;
|
private String rhx_gis = null;
|
||||||
private String csrftoken;
|
private String csrftoken;
|
||||||
// Run into a weird issue with Jsoup cutting some json pages in half, this is a work around
|
|
||||||
// see https://github.com/RipMeApp/ripme/issues/601
|
|
||||||
private String workAroundJsonString;
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -192,6 +189,9 @@ public class InstagramRipper extends AbstractJSONRipper {
|
|||||||
Document p = resp.parse();
|
Document p = resp.parse();
|
||||||
// Get the query hash so we can download the next page
|
// Get the query hash so we can download the next page
|
||||||
qHash = getQHash(p);
|
qHash = getQHash(p);
|
||||||
|
if (qHash == null) {
|
||||||
|
throw new IOException("Unable to extract qhash from page");
|
||||||
|
}
|
||||||
return getJSONFromPage(p);
|
return getJSONFromPage(p);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -398,7 +398,6 @@ public class InstagramRipper extends AbstractJSONRipper {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private boolean pageHasImages(JSONObject json) {
|
private boolean pageHasImages(JSONObject json) {
|
||||||
LOGGER.info(json);
|
|
||||||
int numberOfImages = json.getJSONObject("data").getJSONObject("user")
|
int numberOfImages = json.getJSONObject("data").getJSONObject("user")
|
||||||
.getJSONObject("edge_owner_to_timeline_media").getJSONArray("edges").length();
|
.getJSONObject("edge_owner_to_timeline_media").getJSONArray("edges").length();
|
||||||
if (numberOfImages == 0) {
|
if (numberOfImages == 0) {
|
||||||
@@ -422,23 +421,36 @@ public class InstagramRipper extends AbstractJSONRipper {
|
|||||||
|
|
||||||
}
|
}
|
||||||
in.close();
|
in.close();
|
||||||
workAroundJsonString = sb.toString();
|
|
||||||
return new JSONObject(sb.toString());
|
return new JSONObject(sb.toString());
|
||||||
|
|
||||||
} catch (MalformedURLException e) {
|
} catch (MalformedURLException e) {
|
||||||
LOGGER.info("Unable to get query_hash, " + url + " is a malformed URL");
|
LOGGER.info("Unable to get page, " + url + " is a malformed URL");
|
||||||
return null;
|
return null;
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
LOGGER.info("Unable to get query_hash");
|
LOGGER.info("Unable to get page");
|
||||||
LOGGER.info(e.getMessage());
|
LOGGER.info(e.getMessage());
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private String getQhashUrl(Document doc) {
|
||||||
|
for(Element el : doc.select("link[rel=preload]")) {
|
||||||
|
if (el.attr("href").contains("ProfilePageContainer")) {
|
||||||
|
return el.attr("href");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for(Element el : doc.select("link[rel=preload]")) {
|
||||||
|
if (el.attr("href").contains("metro")) {
|
||||||
|
return el.attr("href");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
private String getQHash(Document doc) {
|
private String getQHash(Document doc) {
|
||||||
String jsFileURL = "https://www.instagram.com" + doc.select("link[rel=preload]").attr("href");
|
String jsFileURL = "https://www.instagram.com" + getQhashUrl(doc);
|
||||||
StringBuilder sb = new StringBuilder();
|
StringBuilder sb = new StringBuilder();
|
||||||
Document jsPage;
|
LOGGER.info(jsFileURL);
|
||||||
try {
|
try {
|
||||||
// We can't use Jsoup here because it won't download a non-html file larger than a MB
|
// We can't use Jsoup here because it won't download a non-html file larger than a MB
|
||||||
// even if you set maxBodySize to 0
|
// even if you set maxBodySize to 0
|
||||||
@@ -454,7 +466,7 @@ public class InstagramRipper extends AbstractJSONRipper {
|
|||||||
LOGGER.info("Unable to get query_hash, " + jsFileURL + " is a malformed URL");
|
LOGGER.info("Unable to get query_hash, " + jsFileURL + " is a malformed URL");
|
||||||
return null;
|
return null;
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
LOGGER.info("Unable to get query_hash");
|
LOGGER.info("Unable to get query_hash from " + jsFileURL);
|
||||||
LOGGER.info(e.getMessage());
|
LOGGER.info(e.getMessage());
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
@@ -468,6 +480,12 @@ public class InstagramRipper extends AbstractJSONRipper {
|
|||||||
m = jsP.matcher(sb.toString());
|
m = jsP.matcher(sb.toString());
|
||||||
if (m.find()) {
|
if (m.find()) {
|
||||||
return m.group(1);
|
return m.group(1);
|
||||||
|
} else {
|
||||||
|
jsP = Pattern.compile(",u=.([a-zA-Z0-9]+).");
|
||||||
|
m = jsP.matcher(sb.toString());
|
||||||
|
if (m.find()) {
|
||||||
|
return m.group(1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -477,6 +495,7 @@ public class InstagramRipper extends AbstractJSONRipper {
|
|||||||
if (m.find()) {
|
if (m.find()) {
|
||||||
return m.group(1);
|
return m.group(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
LOGGER.error("Could not find query_hash on " + jsFileURL);
|
LOGGER.error("Could not find query_hash on " + jsFileURL);
|
||||||
return null;
|
return null;
|
||||||
|
Reference in New Issue
Block a user