mirror of
https://github.com/RipMeApp/ripme.git
synced 2025-08-15 10:24:04 +02:00
Instagram ripper renewed
Fixed tag ripping. Added support for TAGGED, IGTV, STORIES, PINNED items. 'sessionid' cookie support for stories and private rips.
This commit is contained in:
@@ -1,512 +1,433 @@
|
|||||||
package com.rarchives.ripme.ripper.rippers;
|
package com.rarchives.ripme.ripper.rippers;
|
||||||
|
|
||||||
import java.io.BufferedReader;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.InputStreamReader;
|
|
||||||
import java.io.UnsupportedEncodingException;
|
|
||||||
import java.net.MalformedURLException;
|
|
||||||
import java.net.URL;
|
|
||||||
import java.net.URLConnection;
|
|
||||||
import java.time.*;
|
|
||||||
import java.time.format.DateTimeFormatter;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.regex.Matcher;
|
|
||||||
import java.util.regex.Pattern;
|
|
||||||
import java.security.*;
|
|
||||||
|
|
||||||
import org.json.JSONArray;
|
|
||||||
import org.json.JSONException;
|
|
||||||
import org.json.JSONObject;
|
|
||||||
|
|
||||||
import com.rarchives.ripme.ripper.AbstractJSONRipper;
|
import com.rarchives.ripme.ripper.AbstractJSONRipper;
|
||||||
import com.rarchives.ripme.utils.Http;
|
import com.rarchives.ripme.utils.Http;
|
||||||
|
import com.rarchives.ripme.utils.Utils;
|
||||||
|
import org.json.JSONArray;
|
||||||
|
import org.json.JSONObject;
|
||||||
import org.jsoup.Connection;
|
import org.jsoup.Connection;
|
||||||
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Document;
|
||||||
import org.jsoup.nodes.Element;
|
import org.jsoup.nodes.Element;
|
||||||
import com.rarchives.ripme.ui.RipStatusMessage;
|
|
||||||
import com.rarchives.ripme.utils.Utils;
|
import java.io.IOException;
|
||||||
|
import java.net.MalformedURLException;
|
||||||
|
import java.net.URL;
|
||||||
|
import java.time.Instant;
|
||||||
|
import java.time.ZoneOffset;
|
||||||
|
import java.time.ZonedDateTime;
|
||||||
|
import java.time.format.DateTimeFormatter;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Objects;
|
||||||
|
import java.util.Spliterators;
|
||||||
|
import java.util.function.BiFunction;
|
||||||
|
import java.util.function.Consumer;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
import java.util.stream.Stream;
|
||||||
|
import java.util.stream.StreamSupport;
|
||||||
|
|
||||||
|
import static java.lang.String.format;
|
||||||
|
|
||||||
|
// Available configuration options:
|
||||||
|
// instagram.download_images_only - use to skip video links
|
||||||
|
// instagram.session_id - should be set for stories and private accounts (look for sessionid cookie)
|
||||||
public class InstagramRipper extends AbstractJSONRipper {
|
public class InstagramRipper extends AbstractJSONRipper {
|
||||||
String nextPageID = "";
|
|
||||||
private String qHash;
|
private String qHash;
|
||||||
private boolean rippingTag = false;
|
private Map<String, String> cookies = new HashMap<>();
|
||||||
private String tagName;
|
private String idString;
|
||||||
|
private List<String> itemPrefixes = new ArrayList<>();
|
||||||
|
private List<String> failedItems = new ArrayList<>();
|
||||||
|
|
||||||
private String userID;
|
private boolean hashtagRip;
|
||||||
private String rhx_gis = null;
|
private boolean taggedRip;
|
||||||
private String csrftoken;
|
private boolean igtvRip;
|
||||||
|
private boolean postRip;
|
||||||
|
private boolean storiesRip;
|
||||||
|
private boolean pinnedRip;
|
||||||
|
private boolean pinnedReelRip;
|
||||||
|
|
||||||
|
private enum UrlTypePattern {
|
||||||
|
// e.g. https://www.instagram.com/explore/tags/rachelc00k/
|
||||||
|
HASHTAG("explore/tags/(?<tagname>[^?/]+)"),
|
||||||
|
|
||||||
|
// e.g. https://www.instagram.com/stories/rachelc00k/
|
||||||
|
STORIES("stories/(?<username>[^?/]+)"),
|
||||||
|
|
||||||
|
// e.g. https://www.instagram.com/rachelc00k/tagged/
|
||||||
|
USER_TAGGED("(?<username>[^?/]+)/tagged"),
|
||||||
|
|
||||||
|
// e.g. https://www.instagram.com/rachelc00k/channel/
|
||||||
|
IGTV("(?<username>[^?/]+)/channel"),
|
||||||
|
|
||||||
|
// e.g. https://www.instagram.com/p/Bu4CEfbhNk4/
|
||||||
|
SINGLE_POST("(?:p|tv)/(?<shortcode>[^?/]+)"),
|
||||||
|
|
||||||
|
// pseudo-url, e.g. https://www.instagram.com/rachelc00k/?pinned
|
||||||
|
PINNED("(?<username>[^?/]+)/?[?]pinned"),
|
||||||
|
|
||||||
|
// e.g. https://www.instagram.com/rachelc00k/
|
||||||
|
USER_PROFILE("(?<username>[^?/]+)");
|
||||||
|
|
||||||
|
private final String urlTypePattern;
|
||||||
|
|
||||||
|
UrlTypePattern(String urlTypePattern) {
|
||||||
|
this.urlTypePattern = urlTypePattern;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public InstagramRipper(URL url) throws IOException {
|
public InstagramRipper(URL url) throws IOException {
|
||||||
super(url);
|
super(url);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getHost() {
|
protected String getDomain() {
|
||||||
return "instagram";
|
|
||||||
}
|
|
||||||
@Override
|
|
||||||
public String getDomain() {
|
|
||||||
return "instagram.com";
|
return "instagram.com";
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean canRip(URL url) {
|
public String getHost() {
|
||||||
return (url.getHost().endsWith("instagram.com"));
|
return "instagram";
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
|
||||||
URL san_url = new URL(url.toExternalForm().replaceAll("\\?hl=\\S*", ""));
|
|
||||||
LOGGER.info("sanitized URL is " + san_url.toExternalForm());
|
|
||||||
return san_url;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String normalizeUrl(String url) {
|
|
||||||
// Remove the date sig from the url
|
|
||||||
return url.replaceAll("/[A-Z0-9]{8}/", "/");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override public boolean hasASAPRipping() {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
private List<String> getPostsFromSinglePage(JSONObject json) {
|
|
||||||
List<String> imageURLs = new ArrayList<>();
|
|
||||||
JSONArray datas;
|
|
||||||
if (json.getJSONObject("entry_data").getJSONArray("PostPage")
|
|
||||||
.getJSONObject(0).getJSONObject("graphql").getJSONObject("shortcode_media")
|
|
||||||
.has("edge_sidecar_to_children")) {
|
|
||||||
datas = json.getJSONObject("entry_data").getJSONArray("PostPage")
|
|
||||||
.getJSONObject(0).getJSONObject("graphql").getJSONObject("shortcode_media")
|
|
||||||
.getJSONObject("edge_sidecar_to_children").getJSONArray("edges");
|
|
||||||
for (int i = 0; i < datas.length(); i++) {
|
|
||||||
JSONObject data = (JSONObject) datas.get(i);
|
|
||||||
data = data.getJSONObject("node");
|
|
||||||
if (data.has("is_video") && data.getBoolean("is_video")) {
|
|
||||||
imageURLs.add(data.getString("video_url"));
|
|
||||||
} else {
|
|
||||||
imageURLs.add(data.getString("display_url"));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
JSONObject data = json.getJSONObject("entry_data").getJSONArray("PostPage")
|
|
||||||
.getJSONObject(0).getJSONObject("graphql").getJSONObject("shortcode_media");
|
|
||||||
if (data.getBoolean("is_video")) {
|
|
||||||
imageURLs.add(data.getString("video_url"));
|
|
||||||
} else {
|
|
||||||
imageURLs.add(data.getString("display_url"));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return imageURLs;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getGID(URL url) throws MalformedURLException {
|
public String getGID(URL url) throws MalformedURLException {
|
||||||
Pattern p = Pattern.compile("^https?://instagram.com/([^/]+)/?");
|
for (UrlTypePattern urlType : UrlTypePattern.values()) {
|
||||||
Matcher m = p.matcher(url.toExternalForm());
|
Matcher urlMatcher = getUrlMatcher(url, urlType);
|
||||||
if (m.matches()) {
|
if (urlMatcher.matches()) {
|
||||||
return m.group(1);
|
switch (urlType) {
|
||||||
}
|
case HASHTAG:
|
||||||
|
hashtagRip = true;
|
||||||
p = Pattern.compile("^https?://www.instagram.com/([^/]+)/?(?:\\?hl=\\S*)?/?");
|
return "tag_" + urlMatcher.group("tagname");
|
||||||
m = p.matcher(url.toExternalForm());
|
case PINNED:
|
||||||
if (m.matches()) {
|
pinnedRip = true;
|
||||||
return m.group(1);
|
return urlMatcher.group("username") + "_pinned";
|
||||||
}
|
case STORIES:
|
||||||
|
storiesRip = true;
|
||||||
p = Pattern.compile("^https?://www.instagram.com/p/([a-zA-Z0-9_-]+)/\\?taken-by=([^/]+)/?");
|
return urlMatcher.group("username") + "_stories";
|
||||||
m = p.matcher(url.toExternalForm());
|
case USER_TAGGED:
|
||||||
if (m.matches()) {
|
taggedRip = true;
|
||||||
return m.group(2) + "_" + m.group(1);
|
return urlMatcher.group("username") + "_tagged";
|
||||||
}
|
case IGTV:
|
||||||
|
igtvRip = true;
|
||||||
p = Pattern.compile("^https?://www.instagram.com/p/([a-zA-Z0-9_-]+)/?");
|
return urlMatcher.group("username") + "_igtv";
|
||||||
m = p.matcher(url.toExternalForm());
|
case SINGLE_POST:
|
||||||
if (m.matches()) {
|
postRip = true;
|
||||||
return m.group(1);
|
return "post_" + urlMatcher.group("shortcode");
|
||||||
}
|
case USER_PROFILE:
|
||||||
|
return urlMatcher.group("username");
|
||||||
p = Pattern.compile("^https?://www.instagram.com/p/([a-zA-Z0-9_-]+)/?(?:\\?hl=\\S*)?/?");
|
|
||||||
m = p.matcher(url.toExternalForm());
|
|
||||||
if (m.matches()) {
|
|
||||||
return m.group(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
p = Pattern.compile("^https?://www.instagram.com/explore/tags/([^/]+)/?");
|
|
||||||
m = p.matcher(url.toExternalForm());
|
|
||||||
if (m.matches()) {
|
|
||||||
rippingTag = true;
|
|
||||||
tagName = m.group(1);
|
|
||||||
return m.group(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
throw new MalformedURLException("Unable to find user in " + url);
|
|
||||||
}
|
|
||||||
|
|
||||||
private String stripHTMLTags(String t) {
|
|
||||||
t = t.replaceAll("<html>\n" +
|
|
||||||
" <head></head>\n" +
|
|
||||||
" <body>", "");
|
|
||||||
t.replaceAll("</body>\n" +
|
|
||||||
"</html>", "");
|
|
||||||
t = t.replaceAll("\n", "");
|
|
||||||
t = t.replaceAll("=\"\"", "");
|
|
||||||
return t;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
private JSONObject getJSONFromPage(Document firstPage) throws IOException {
|
|
||||||
// Check if this page is HTML + JSON or jsut json
|
|
||||||
if (!firstPage.html().contains("window._sharedData =")) {
|
|
||||||
return new JSONObject(stripHTMLTags(firstPage.html()));
|
|
||||||
}
|
|
||||||
String jsonText = "";
|
|
||||||
try {
|
|
||||||
for (Element script : firstPage.select("script[type=text/javascript]")) {
|
|
||||||
if (script.data().contains("window._sharedData = ")) {
|
|
||||||
jsonText = script.data().replaceAll("window._sharedData = ", "");
|
|
||||||
jsonText = jsonText.replaceAll("};", "}");
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return new JSONObject(jsonText);
|
|
||||||
} catch (JSONException e) {
|
|
||||||
throw new IOException("Could not get JSON from page");
|
|
||||||
}
|
}
|
||||||
|
throw new MalformedURLException("This URL can't be ripped");
|
||||||
|
}
|
||||||
|
|
||||||
|
private Matcher getUrlMatcher(URL url, UrlTypePattern type) {
|
||||||
|
String baseRegex = "^https?://(?:www[.])?instagram[.]com/%s(?:[?/].*)?";
|
||||||
|
Pattern pattern = Pattern.compile(format(baseRegex, type.urlTypePattern));
|
||||||
|
return pattern.matcher(url.toExternalForm());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public JSONObject getFirstPage() throws IOException {
|
public JSONObject getFirstPage() throws IOException {
|
||||||
Connection.Response resp = Http.url(url).response();
|
setAuthCookie();
|
||||||
LOGGER.info(resp.cookies());
|
Document document = Http.url(url).cookies(cookies).response().parse();
|
||||||
csrftoken = resp.cookie("csrftoken");
|
qHash = getQhash(document);
|
||||||
Document p = resp.parse();
|
JSONObject jsonObject = getJsonObjectFromDoc(document);
|
||||||
// Get the query hash so we can download the next page
|
String hashtagNamePath = "entry_data.TagPage[0].graphql.hashtag.name";
|
||||||
qHash = getQHash(p);
|
String singlePostIdPath = "entry_data.PostPage[0].graphql.shortcode_media.shortcode";
|
||||||
if (qHash == null) {
|
String profileIdPath = "entry_data.ProfilePage[0].graphql.user.id";
|
||||||
throw new IOException("Unable to extract qhash from page");
|
String storiesPath = "entry_data.StoriesPage[0].user.id";
|
||||||
}
|
String idPath = hashtagRip ? hashtagNamePath : storiesRip ? storiesPath : postRip ? singlePostIdPath : profileIdPath;
|
||||||
return getJSONFromPage(p);
|
idString = getJsonStringByPath(jsonObject, idPath);
|
||||||
|
return taggedRip ? getNextPage(null) : pinnedRip ? getPinnedItems(document) : storiesRip ? getStoriesItems() : jsonObject;
|
||||||
}
|
}
|
||||||
|
|
||||||
private String getVideoFromPage(String videoID) {
|
private void setAuthCookie() throws IOException {
|
||||||
|
String sessionId = Utils.getConfigString("instagram.session_id", null);
|
||||||
|
if ((storiesRip || pinnedRip) && sessionId == null) {
|
||||||
|
throw new IOException("instagram.session_id should be set up for Instagram stories");
|
||||||
|
}
|
||||||
|
if (sessionId != null) {
|
||||||
|
cookies.put("sessionid", sessionId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Query hash is used for graphql requests
|
||||||
|
private String getQhash(Document doc) throws IOException {
|
||||||
|
if (postRip) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
String pinnedRegex = "=\"(?<hash>[0-9a-f]+)\"[^;]+[.]generatePaginationActionCreators";
|
||||||
|
String storiesRegex = "=50,h=\"(?<hash>[0-9a-f]+)\"";
|
||||||
|
String hashRegex = "%s[^;]+pagination}?,queryId:\"(?<hash>[0-9a-f]+)\"";
|
||||||
|
String forHashtag = "tagMedia[.]byTagName";
|
||||||
|
String forTagged = "taggedPosts[.]byUserId";
|
||||||
|
String forUser = "profilePosts[.]byUserId";
|
||||||
|
String href = "";
|
||||||
|
Pattern pattern = Pattern.compile(format(hashRegex, forUser));
|
||||||
|
for (Element el : doc.select("link[rel=preload]")) {
|
||||||
|
href = el.attr("href");
|
||||||
|
if ((storiesRip || pinnedReelRip) && href.contains("Consumer.js")) {
|
||||||
|
pattern = Pattern.compile(storiesRegex);
|
||||||
|
break;
|
||||||
|
} else if (href.contains("ProfilePageContainer") || href.contains("TagPageContainer")) {
|
||||||
|
pattern = Pattern.compile(pinnedRip ? pinnedRegex :
|
||||||
|
format(hashRegex, hashtagRip ? forHashtag : taggedRip ? forTagged : forUser));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Matcher matcher = pattern.matcher(Http.url("https://www.instagram.com" + href).response().body());
|
||||||
|
return matcher.find() ? matcher.group("hash") : null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private JSONObject getJsonObjectFromDoc(Document document) {
|
||||||
|
for (Element script : document.select("script[type=text/javascript]")) {
|
||||||
|
String scriptText = script.data();
|
||||||
|
if (scriptText.startsWith("window._sharedData")) {
|
||||||
|
String jsonText = scriptText.replaceAll("[^{]*([{].*})[^}]*", "$1");
|
||||||
|
return new JSONObject(jsonText);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public JSONObject getNextPage(JSONObject source) throws IOException {
|
||||||
|
if (postRip || storiesRip || pinnedReelRip) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
JSONObject nextPageQuery = new JSONObject().put(hashtagRip ? "tag_name" : "id", idString).put("first", 12);
|
||||||
|
if (source == null) {
|
||||||
|
return graphqlRequest(nextPageQuery);
|
||||||
|
}
|
||||||
|
JSONObject pageInfo = getMediaRoot(source).getJSONObject("page_info");
|
||||||
|
if (pageInfo.getBoolean("has_next_page")) {
|
||||||
|
return graphqlRequest(nextPageQuery.put("after", pageInfo.getString("end_cursor")));
|
||||||
|
} else {
|
||||||
|
failedItems.forEach(LOGGER::error);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private JSONObject getStoriesItems() throws IOException {
|
||||||
|
return graphqlRequest(new JSONObject().append("reel_ids", idString).put("precomposed_overlay", false));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Two requests with different query hashes required for pinned items.
|
||||||
|
// Query hash to be used depends on flag specified:
|
||||||
|
// pinnedRip flag is used initially to get list of pinned albums;
|
||||||
|
// pinnedReelRip flag is used next to get media urls.
|
||||||
|
private JSONObject getPinnedItems(Document document) throws IOException {
|
||||||
|
JSONObject queryForIds = new JSONObject().put("user_id", idString).put("include_highlight_reels", true);
|
||||||
|
JSONObject pinnedIdsJson = graphqlRequest(queryForIds);
|
||||||
|
JSONArray pinnedItems = getJsonArrayByPath(pinnedIdsJson, "data.user.edge_highlight_reels.edges");
|
||||||
|
pinnedRip = false;
|
||||||
|
pinnedReelRip = true;
|
||||||
|
qHash = getQhash(document);
|
||||||
|
JSONObject queryForDetails = new JSONObject();
|
||||||
|
getStreamOfJsonArray(pinnedItems)
|
||||||
|
.map(object -> getJsonStringByPath(object, "node.id"))
|
||||||
|
.forEach(id -> queryForDetails.append("highlight_reel_ids", id));
|
||||||
|
queryForDetails.put("precomposed_overlay", false);
|
||||||
|
return graphqlRequest(queryForDetails);
|
||||||
|
}
|
||||||
|
|
||||||
|
private JSONObject graphqlRequest(JSONObject vars) throws IOException {
|
||||||
|
// Sleep for a while to avoid a ban
|
||||||
|
sleep(2500);
|
||||||
|
String url = format("https://www.instagram.com/graphql/query/?query_hash=%s&variables=%s", qHash, vars.toString());
|
||||||
|
return Http.url(url).cookies(cookies).getJSON();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<String> getURLsFromJSON(JSONObject json) {
|
||||||
|
if (storiesRip || pinnedReelRip) {
|
||||||
|
JSONArray storyAlbums = getJsonArrayByPath(json, "data.reels_media");
|
||||||
|
return getStreamOfJsonArray(storyAlbums)
|
||||||
|
.flatMap(album -> getStreamOfJsonArray(album.getJSONArray("items")))
|
||||||
|
.peek(storyItem -> itemPrefixes.add(getTimestampPrefix(storyItem)))
|
||||||
|
.flatMap(this::parseStoryItemForUrls)
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
}
|
||||||
|
if (postRip) {
|
||||||
|
JSONObject detailsJson = downloadItemDetailsJson(idString);
|
||||||
|
addPrefixInfo(detailsJson);
|
||||||
|
return parseItemDetailsForUrls(detailsJson).collect(Collectors.toList());
|
||||||
|
}
|
||||||
|
JSONArray edges = getMediaRoot(json).getJSONArray("edges");
|
||||||
|
return getStreamOfJsonArray(edges)
|
||||||
|
.map(edge -> getJsonStringByPath(edge, "node.shortcode"))
|
||||||
|
.map(this::downloadItemDetailsJson)
|
||||||
|
.filter(Objects::nonNull)
|
||||||
|
.peek(this::addPrefixInfo)
|
||||||
|
.flatMap(this::parseItemDetailsForUrls)
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
}
|
||||||
|
|
||||||
|
private Stream<? extends String> parseStoryItemForUrls(JSONObject storyItem) {
|
||||||
|
if (storyItem.getBoolean("is_video")) {
|
||||||
|
itemPrefixes.add(getTimestampPrefix(storyItem) + "preview_");
|
||||||
|
int lastIndex = storyItem.getJSONArray("video_resources").length() - 1;
|
||||||
|
return Stream.of(
|
||||||
|
getJsonStringByPath(storyItem, "video_resources[" + lastIndex + "].src"),
|
||||||
|
storyItem.getString("display_url"));
|
||||||
|
}
|
||||||
|
return Stream.of(storyItem.getString("display_url"));
|
||||||
|
}
|
||||||
|
|
||||||
|
private JSONObject getMediaRoot(JSONObject json) {
|
||||||
|
String userExtra = "data.user.edge_owner_to_timeline_media";
|
||||||
|
String igtvExtra = "data.user.edge_felix_video_timeline";
|
||||||
|
String taggedExtra = "data.user.edge_user_to_photos_of_you";
|
||||||
|
String hashtagExtra = "data.hashtag.edge_hashtag_to_media";
|
||||||
|
String userHomeRoot = "entry_data.ProfilePage[0].graphql.user.edge_owner_to_timeline_media";
|
||||||
|
String igtvHomeRoot = "entry_data.ProfilePage[0].graphql.user.edge_felix_video_timeline";
|
||||||
|
String hashtagHomeRoot = "entry_data.TagPage[0].graphql.hashtag.edge_hashtag_to_media";
|
||||||
|
String mediaRootPath = json.optJSONObject("entry_data") != null ?
|
||||||
|
(hashtagRip ? hashtagHomeRoot : igtvRip ? igtvHomeRoot : userHomeRoot) : hashtagRip ?
|
||||||
|
hashtagExtra : igtvRip ? igtvExtra : taggedRip ? taggedExtra : userExtra;
|
||||||
|
return getJsonObjectByPath(json, mediaRootPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
private JSONObject downloadItemDetailsJson(String shortcode) {
|
||||||
|
String url = "https://www.instagram.com/p/%s/?__a=1";
|
||||||
try {
|
try {
|
||||||
Document doc = Http.url("https://www.instagram.com/p/" + videoID).get();
|
Http http = Http.url(format(url, shortcode));
|
||||||
|
http.ignoreContentType();
|
||||||
|
http.connection().followRedirects(false);
|
||||||
|
Connection.Response response = http.cookies(cookies).response();
|
||||||
|
// Fix for redirection link; repeat request with the new shortcode
|
||||||
|
if (response.statusCode() == 302) {
|
||||||
|
Pattern redirectIdPattern = Pattern.compile("/p/(?<shortcode>[^?/]+)");
|
||||||
|
Matcher m = redirectIdPattern.matcher(response.header("location"));
|
||||||
|
return m.find() ? downloadItemDetailsJson(m.group("shortcode")) : null;
|
||||||
|
}
|
||||||
|
return new JSONObject(response.body());
|
||||||
|
} catch (Exception e) {
|
||||||
|
failedItems.add(shortcode);
|
||||||
|
LOGGER.trace(format("No item %s found", shortcode), e);
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void addPrefixInfo(JSONObject itemDetailsJson) {
|
||||||
|
JSONObject mediaItem = getJsonObjectByPath(itemDetailsJson, "graphql.shortcode_media");
|
||||||
|
String shortcode = mediaItem.getString("shortcode");
|
||||||
|
int subItemsCount = "GraphSidecar".equals(mediaItem.getString("__typename")) ?
|
||||||
|
getJsonArrayByPath(mediaItem, "edge_sidecar_to_children.edges").length() : 1;
|
||||||
|
for (int i = 0; i < subItemsCount; i++) {
|
||||||
|
itemPrefixes.add(getTimestampPrefix(mediaItem) + shortcode + "_");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private String getTimestampPrefix(JSONObject item) {
|
||||||
|
Instant instant = Instant.ofEpochSecond(item.getLong("taken_at_timestamp"));
|
||||||
|
return DateTimeFormatter.ofPattern("yyyy-MM-dd_HH-mm-ss_").format(ZonedDateTime.ofInstant(instant, ZoneOffset.UTC));
|
||||||
|
}
|
||||||
|
|
||||||
|
private Stream<? extends String> parseItemDetailsForUrls(JSONObject itemDetailsJson) {
|
||||||
|
JSONObject mediaItem = getJsonObjectByPath(itemDetailsJson, "graphql.shortcode_media");
|
||||||
|
// For some reason JSON video_url has lower quality than the HTML-tag one
|
||||||
|
// HTML-tag url is requested here and marked with _extra_ prefix
|
||||||
|
if ("GraphVideo".equals(mediaItem.getString("__typename"))) {
|
||||||
|
String shortcode = mediaItem.getString("shortcode");
|
||||||
|
String urlFromPage = getVideoUrlFromPage(shortcode);
|
||||||
|
if (!urlFromPage.isEmpty()) {
|
||||||
|
itemPrefixes.add(getTimestampPrefix(mediaItem) + shortcode + "_extra_");
|
||||||
|
return Stream.of(mediaItem.getString("video_url"), urlFromPage);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return parseRootForUrls(mediaItem);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Uses recursion for GraphSidecar
|
||||||
|
private Stream<? extends String> parseRootForUrls(JSONObject mediaItem) {
|
||||||
|
String typeName = mediaItem.getString("__typename");
|
||||||
|
switch (typeName) {
|
||||||
|
case "GraphImage":
|
||||||
|
return Stream.of(mediaItem.getString("display_url"));
|
||||||
|
case "GraphVideo":
|
||||||
|
return Stream.of(mediaItem.getString("video_url"));
|
||||||
|
case "GraphSidecar":
|
||||||
|
JSONArray sideCar = getJsonArrayByPath(mediaItem, "edge_sidecar_to_children.edges");
|
||||||
|
return getStreamOfJsonArray(sideCar).map(object -> object.getJSONObject("node"))
|
||||||
|
.flatMap(this::parseRootForUrls);
|
||||||
|
}
|
||||||
|
return Stream.empty();
|
||||||
|
}
|
||||||
|
|
||||||
|
private String getVideoUrlFromPage(String videoID) {
|
||||||
|
try {
|
||||||
|
Document doc = Http.url("https://www.instagram.com/p/" + videoID).cookies(cookies).get();
|
||||||
return doc.select("meta[property=og:video]").attr("content");
|
return doc.select("meta[property=og:video]").attr("content");
|
||||||
} catch (IOException e) {
|
} catch (Exception e) {
|
||||||
LOGGER.warn("Unable to get page " + "https://www.instagram.com/p/" + videoID);
|
LOGGER.warn("Unable to get page " + "https://www.instagram.com/p/" + videoID);
|
||||||
}
|
}
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
|
|
||||||
private String getOriginalUrl(String imageURL) {
|
|
||||||
// Without this regex most images will return a 403 error
|
|
||||||
imageURL = imageURL.replaceAll("vp/[a-zA-Z0-9]*/", "");
|
|
||||||
imageURL = imageURL.replaceAll("scontent.cdninstagram.com/hphotos-", "igcdn-photos-d-a.akamaihd.net/hphotos-ak-");
|
|
||||||
|
|
||||||
// Instagram returns cropped images to unauthenticated applications to maintain legacy support.
|
|
||||||
// To retrieve the uncropped image, remove this segment from the URL.
|
|
||||||
// Segment format: cX.Y.W.H - eg: c0.134.1080.1080
|
|
||||||
imageURL = imageURL.replaceAll("/c\\d{1,4}\\.\\d{1,4}\\.\\d{1,4}\\.\\d{1,4}", "");
|
|
||||||
imageURL = imageURL.replaceAll("\\?ig_cache_key.+$", "");
|
|
||||||
return imageURL;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getAfter(JSONObject json) {
|
|
||||||
try {
|
|
||||||
return json.getJSONObject("entry_data").getJSONArray("ProfilePage").getJSONObject(0)
|
|
||||||
.getJSONObject("graphql").getJSONObject("user")
|
|
||||||
.getJSONObject("edge_owner_to_timeline_media").getJSONObject("page_info").getString("end_cursor");
|
|
||||||
} catch (JSONException e) {
|
|
||||||
// This is here so that when the user rips the last page they don't get a "end_cursor not a string" error
|
|
||||||
try {
|
|
||||||
return json.getJSONObject("data").getJSONObject("user")
|
|
||||||
.getJSONObject("edge_owner_to_timeline_media").getJSONObject("page_info").getString("end_cursor");
|
|
||||||
} catch (JSONException t) {
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<String> getURLsFromJSON(JSONObject json) {
|
protected void downloadURL(URL url, int index) {
|
||||||
List<String> imageURLs = new ArrayList<>();
|
if (Utils.getConfigBoolean("instagram.download_images_only", false) && url.toString().contains(".mp4?")) {
|
||||||
if (!url.toExternalForm().contains("/p/")) {
|
LOGGER.info("Skipped video url: " + url);
|
||||||
nextPageID = getAfter(json);
|
return;
|
||||||
}
|
}
|
||||||
|
addURLToDownload(url, itemPrefixes.get(index - 1), "", null, cookies);
|
||||||
// get the rhx_gis value so we can get the next page later on
|
|
||||||
if (rhx_gis == null) {
|
|
||||||
try {
|
|
||||||
rhx_gis = json.getString("rhx_gis");
|
|
||||||
} catch (JSONException ex) {
|
|
||||||
// Instagram has removed this token, but ...
|
|
||||||
LOGGER.error("Error while getting rhx_gis: " + ex.getMessage());
|
|
||||||
//... if we set it to "", the next page can still be fetched
|
|
||||||
rhx_gis = "";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (!url.toExternalForm().contains("/p/")) {
|
|
||||||
JSONArray datas = new JSONArray();
|
|
||||||
if (!rippingTag) {
|
|
||||||
// This first try only works on data from the first page
|
|
||||||
try {
|
|
||||||
JSONArray profilePage = json.getJSONObject("entry_data").getJSONArray("ProfilePage");
|
|
||||||
userID = profilePage.getJSONObject(0).getString("logging_page_id").replaceAll("profilePage_", "");
|
|
||||||
datas = json.getJSONObject("entry_data").getJSONArray("ProfilePage").getJSONObject(0)
|
|
||||||
.getJSONObject("graphql").getJSONObject("user")
|
|
||||||
.getJSONObject("edge_owner_to_timeline_media").getJSONArray("edges");
|
|
||||||
} catch (JSONException e) {
|
|
||||||
datas = json.getJSONObject("data").getJSONObject("user")
|
|
||||||
.getJSONObject("edge_owner_to_timeline_media").getJSONArray("edges");
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
try {
|
|
||||||
JSONArray tagPage = json.getJSONObject("entry_data").getJSONArray("TagPage");
|
|
||||||
datas = tagPage.getJSONObject(0).getJSONObject("graphql").getJSONObject("hashtag")
|
|
||||||
.getJSONObject("edge_hashtag_to_media").getJSONArray("edges");
|
|
||||||
} catch (JSONException e) {
|
|
||||||
datas = json.getJSONObject("data").getJSONObject("hashtag").getJSONObject("edge_hashtag_to_media")
|
|
||||||
.getJSONArray("edges");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (int i = 0; i < datas.length(); i++) {
|
|
||||||
JSONObject data = (JSONObject) datas.get(i);
|
|
||||||
data = data.getJSONObject("node");
|
|
||||||
Long epoch = data.getLong("taken_at_timestamp");
|
|
||||||
Instant instant = Instant.ofEpochSecond(epoch);
|
|
||||||
String image_date = DateTimeFormatter.ofPattern("yyyy_MM_dd_hh:mm_").format(ZonedDateTime.ofInstant(instant, ZoneOffset.UTC));
|
|
||||||
// It looks like tag pages don't have the __typename key
|
|
||||||
if (!rippingTag) {
|
|
||||||
if (data.getString("__typename").equals("GraphSidecar")) {
|
|
||||||
try {
|
|
||||||
Document slideShowDoc = Http.url(new URL("https://www.instagram.com/p/" + data.getString("shortcode"))).get();
|
|
||||||
List<String> toAdd = getPostsFromSinglePage(getJSONFromPage(slideShowDoc));
|
|
||||||
for (int slideShowInt = 0; slideShowInt < toAdd.size(); slideShowInt++) {
|
|
||||||
addURLToDownload(new URL(toAdd.get(slideShowInt)), image_date + data.getString("shortcode"));
|
|
||||||
}
|
|
||||||
} catch (MalformedURLException e) {
|
|
||||||
LOGGER.error("Unable to download slide show, URL was malformed");
|
|
||||||
} catch (IOException e) {
|
|
||||||
LOGGER.error("Unable to download slide show");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
try {
|
|
||||||
if (!data.getBoolean("is_video")) {
|
|
||||||
if (imageURLs.isEmpty()) {
|
|
||||||
// We add this one item to the array because either wise
|
|
||||||
// the ripper will error out because we returned an empty array
|
|
||||||
imageURLs.add(getOriginalUrl(data.getString("display_url")));
|
|
||||||
}
|
|
||||||
addURLToDownload(new URL(data.getString("display_url")), image_date);
|
|
||||||
} else {
|
|
||||||
if (!Utils.getConfigBoolean("instagram.download_images_only", false)) {
|
|
||||||
addURLToDownload(new URL(getVideoFromPage(data.getString("shortcode"))), image_date);
|
|
||||||
} else {
|
|
||||||
sendUpdate(RipStatusMessage.STATUS.DOWNLOAD_WARN, "Skipping video " + data.getString("shortcode"));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (MalformedURLException e) {
|
|
||||||
LOGGER.info("Got MalformedURLException");
|
|
||||||
return imageURLs;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (isThisATest()) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} else { // We're ripping from a single page
|
|
||||||
LOGGER.info("Ripping from single page");
|
|
||||||
imageURLs = getPostsFromSinglePage(json);
|
|
||||||
}
|
|
||||||
|
|
||||||
return imageURLs;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private String getIGGis(String variables) {
|
// Some JSON helper methods below
|
||||||
String stringToMD5 = rhx_gis + ":" + variables;
|
/* ------------------------------------------------------------------------------------------------------- */
|
||||||
LOGGER.debug("String to md5 is \"" + stringToMD5 + "\"");
|
private JSONObject getJsonObjectByPath(JSONObject object, String key) {
|
||||||
try {
|
Pattern arrayPattern = Pattern.compile("(?<arr>.*)\\[(?<idx>\\d+)]");
|
||||||
byte[] bytesOfMessage = stringToMD5.getBytes("UTF-8");
|
JSONObject result = object;
|
||||||
|
for (String s : key.split("[.]")) {
|
||||||
MessageDigest md = MessageDigest.getInstance("MD5");
|
Matcher m = arrayPattern.matcher(s);
|
||||||
byte[] hash = md.digest(bytesOfMessage);
|
result = m.matches() ?
|
||||||
StringBuffer sb = new StringBuffer();
|
result.getJSONArray(m.group("arr")).getJSONObject(Integer.parseInt(m.group("idx"))) :
|
||||||
for (int i = 0; i < hash.length; ++i) {
|
result.getJSONObject(s);
|
||||||
sb.append(Integer.toHexString((hash[i] & 0xFF) | 0x100).substring(1,3));
|
|
||||||
}
|
|
||||||
return sb.toString();
|
|
||||||
} catch(UnsupportedEncodingException e) {
|
|
||||||
return null;
|
|
||||||
} catch(NoSuchAlgorithmException e) {
|
|
||||||
return null;
|
|
||||||
}
|
}
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
private <T> T getByPath(BiFunction<JSONObject, String, T> func, JSONObject object, String key) {
|
||||||
public JSONObject getNextPage(JSONObject json) throws IOException {
|
int namePos = key.lastIndexOf('.');
|
||||||
JSONObject toreturn;
|
JSONObject parent = namePos < 0 ? object : getJsonObjectByPath(object, key.substring(0, namePos));
|
||||||
java.util.Map<String, String> cookies = new HashMap<String, String>();
|
return func.apply(parent, key.substring(namePos + 1));
|
||||||
// This shouldn't be hardcoded and will break one day
|
|
||||||
cookies.put("ig_pr", "1");
|
|
||||||
cookies.put("csrftoken", csrftoken);
|
|
||||||
if (!nextPageID.equals("") && !isThisATest()) {
|
|
||||||
if (rippingTag) {
|
|
||||||
try {
|
|
||||||
sleep(2500);
|
|
||||||
String vars = "{\"tag_name\":\"" + tagName + "\",\"first\":4,\"after\":\"" + nextPageID + "\"}";
|
|
||||||
String ig_gis = getIGGis(vars);
|
|
||||||
toreturn = getPage("https://www.instagram.com/graphql/query/?query_hash=" + qHash +
|
|
||||||
"&variables=" + vars, ig_gis);
|
|
||||||
// Sleep for a while to avoid a ban
|
|
||||||
LOGGER.info(toreturn);
|
|
||||||
if (!pageHasImages(toreturn)) {
|
|
||||||
throw new IOException("No more pages");
|
|
||||||
}
|
|
||||||
return toreturn;
|
|
||||||
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new IOException("No more pages");
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
try {
|
|
||||||
// Sleep for a while to avoid a ban
|
|
||||||
sleep(2500);
|
|
||||||
String vars = "{\"id\":\"" + userID + "\",\"first\":12,\"after\":\"" + nextPageID + "\"}";
|
|
||||||
String ig_gis = getIGGis(vars);
|
|
||||||
LOGGER.info(ig_gis);
|
|
||||||
|
|
||||||
LOGGER.info("https://www.instagram.com/graphql/query/?query_hash=" + qHash + "&variables=" + vars);
|
|
||||||
toreturn = getPage("https://www.instagram.com/graphql/query/?query_hash=" + qHash + "&variables=" + vars, ig_gis);
|
|
||||||
if (!pageHasImages(toreturn)) {
|
|
||||||
throw new IOException("No more pages");
|
|
||||||
}
|
|
||||||
return toreturn;
|
|
||||||
} catch (IOException e) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
throw new IOException("No more pages");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
private JSONArray getJsonArrayByPath(JSONObject object, String key) {
|
||||||
public void downloadURL(URL url, int index) {
|
return getByPath(JSONObject::getJSONArray, object, key);
|
||||||
addURLToDownload(url);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean pageHasImages(JSONObject json) {
|
private String getJsonStringByPath(JSONObject object, String key) {
|
||||||
int numberOfImages = json.getJSONObject("data").getJSONObject("user")
|
return getByPath(JSONObject::getString, object, key);
|
||||||
.getJSONObject("edge_owner_to_timeline_media").getJSONArray("edges").length();
|
|
||||||
if (numberOfImages == 0) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private JSONObject getPage(String url, String ig_gis) {
|
private Stream<JSONObject> getStreamOfJsonArray(JSONArray array) {
|
||||||
StringBuilder sb = new StringBuilder();
|
return StreamSupport.stream(new JSONSpliterator(array), false);
|
||||||
try {
|
|
||||||
// We can't use Jsoup here because it won't download a non-html file larger than a MB
|
|
||||||
// even if you set maxBodySize to 0
|
|
||||||
URLConnection connection = new URL(url).openConnection();
|
|
||||||
connection.setRequestProperty("User-Agent", USER_AGENT);
|
|
||||||
connection.setRequestProperty("x-instagram-gis", ig_gis);
|
|
||||||
BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream()));
|
|
||||||
String line;
|
|
||||||
while ((line = in.readLine()) != null) {
|
|
||||||
sb.append(line);
|
|
||||||
|
|
||||||
}
|
|
||||||
in.close();
|
|
||||||
return new JSONObject(sb.toString());
|
|
||||||
|
|
||||||
} catch (MalformedURLException e) {
|
|
||||||
LOGGER.info("Unable to get page, " + url + " is a malformed URL");
|
|
||||||
return null;
|
|
||||||
} catch (IOException e) {
|
|
||||||
LOGGER.info("Unable to get page");
|
|
||||||
LOGGER.info(e.getMessage());
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private String getQhashUrl(Document doc) {
|
private class JSONSpliterator extends Spliterators.AbstractSpliterator<JSONObject> {
|
||||||
for(Element el : doc.select("link[rel=preload]")) {
|
private JSONArray array;
|
||||||
if (el.attr("href").contains("ProfilePageContainer")) {
|
private int index = 0;
|
||||||
return el.attr("href");
|
|
||||||
}
|
JSONSpliterator(JSONArray array) {
|
||||||
|
super(array.length(), SIZED | ORDERED);
|
||||||
|
this.array = array;
|
||||||
}
|
}
|
||||||
for(Element el : doc.select("link[rel=preload]")) {
|
|
||||||
if (el.attr("href").contains("metro")) {
|
@Override
|
||||||
return el.attr("href");
|
public boolean tryAdvance(Consumer<? super JSONObject> action) {
|
||||||
|
if (index == array.length()) {
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
action.accept(array.getJSONObject(index++));
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
return null;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private String getQHash(Document doc) {
|
|
||||||
String jsFileURL = "https://www.instagram.com" + getQhashUrl(doc);
|
|
||||||
StringBuilder sb = new StringBuilder();
|
|
||||||
LOGGER.info(jsFileURL);
|
|
||||||
try {
|
|
||||||
// We can't use Jsoup here because it won't download a non-html file larger than a MB
|
|
||||||
// even if you set maxBodySize to 0
|
|
||||||
URLConnection connection = new URL(jsFileURL).openConnection();
|
|
||||||
BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream()));
|
|
||||||
String line;
|
|
||||||
while ((line = in.readLine()) != null) {
|
|
||||||
sb.append(line);
|
|
||||||
}
|
|
||||||
in.close();
|
|
||||||
|
|
||||||
} catch (MalformedURLException e) {
|
|
||||||
LOGGER.info("Unable to get query_hash, " + jsFileURL + " is a malformed URL");
|
|
||||||
return null;
|
|
||||||
} catch (IOException e) {
|
|
||||||
LOGGER.info("Unable to get query_hash from " + jsFileURL);
|
|
||||||
LOGGER.info(e.getMessage());
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
if (!rippingTag) {
|
|
||||||
Pattern jsP = Pattern.compile("byUserId\\.get\\(t\\)\\)\\|\\|void 0===r\\?void 0:r\\.pagination},queryId:.([a-zA-Z0-9]+)");
|
|
||||||
Matcher m = jsP.matcher(sb.toString());
|
|
||||||
if (m.find()) {
|
|
||||||
return m.group(1);
|
|
||||||
} else {
|
|
||||||
jsP = Pattern.compile("0:s\\.pagination},queryId:.([a-zA-Z0-9]+)");
|
|
||||||
m = jsP.matcher(sb.toString());
|
|
||||||
if (m.find()) {
|
|
||||||
return m.group(1);
|
|
||||||
} else {
|
|
||||||
jsP = Pattern.compile(",u=.([a-zA-Z0-9]+).");
|
|
||||||
m = jsP.matcher(sb.toString());
|
|
||||||
if (m.find()) {
|
|
||||||
return m.group(1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} else {
|
|
||||||
Pattern jsP = Pattern.compile("return e.tagMedia.byTagName.get\\(t\\).pagination},queryId:.([a-zA-Z0-9]+).");
|
|
||||||
Matcher m = jsP.matcher(sb.toString());
|
|
||||||
if (m.find()) {
|
|
||||||
return m.group(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
LOGGER.error("Could not find query_hash on " + jsFileURL);
|
|
||||||
return null;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@@ -15,10 +15,15 @@ public class InstagramRipperTest extends RippersTest {
|
|||||||
Map<URL, String> testURLs = new HashMap<>();
|
Map<URL, String> testURLs = new HashMap<>();
|
||||||
testURLs.put(new URL("http://instagram.com/Test_User"), "Test_User");
|
testURLs.put(new URL("http://instagram.com/Test_User"), "Test_User");
|
||||||
testURLs.put(new URL("http://instagram.com/_test_user_"), "_test_user_");
|
testURLs.put(new URL("http://instagram.com/_test_user_"), "_test_user_");
|
||||||
testURLs.put(new URL("https://www.instagram.com/p/BZ4egP7njW5/?hl=en"), "BZ4egP7njW5");
|
testURLs.put(new URL("http://instagram.com/_test_user_/?pinned"), "_test_user__pinned");
|
||||||
testURLs.put(new URL("https://www.instagram.com/p/BZ4egP7njW5"), "BZ4egP7njW5");
|
testURLs.put(new URL("http://instagram.com/stories/_test_user_/"), "_test_user__stories");
|
||||||
testURLs.put(new URL("https://www.instagram.com/p/BaNPpaHn2zU/?taken-by=hilaryduff"), "hilaryduff_BaNPpaHn2zU");
|
testURLs.put(new URL("http://instagram.com/_test_user_/tagged"), "_test_user__tagged");
|
||||||
testURLs.put(new URL("https://www.instagram.com/p/BaNPpaHn2zU/"), "BaNPpaHn2zU");
|
testURLs.put(new URL("http://instagram.com/_test_user_/channel"), "_test_user__igtv");
|
||||||
|
testURLs.put(new URL("http://instagram.com/explore/tags/test_your_tag"), "tag_test_your_tag");
|
||||||
|
testURLs.put(new URL("https://www.instagram.com/p/BZ4egP7njW5/?hl=en"), "post_BZ4egP7njW5");
|
||||||
|
testURLs.put(new URL("https://www.instagram.com/p/BZ4egP7njW5"), "post_BZ4egP7njW5");
|
||||||
|
testURLs.put(new URL("https://www.instagram.com/p/BaNPpaHn2zU/?taken-by=hilaryduff"), "post_BaNPpaHn2zU");
|
||||||
|
testURLs.put(new URL("https://www.instagram.com/p/BaNPpaHn2zU/"), "post_BaNPpaHn2zU");
|
||||||
for (URL url : testURLs.keySet()) {
|
for (URL url : testURLs.keySet()) {
|
||||||
InstagramRipper ripper = new InstagramRipper(url);
|
InstagramRipper ripper = new InstagramRipper(url);
|
||||||
ripper.setup();
|
ripper.setup();
|
||||||
|
Reference in New Issue
Block a user