From 989ea710631e702e03a05d49b94563db30aed3ef Mon Sep 17 00:00:00 2001 From: Tushar Date: Mon, 22 Apr 2019 23:23:21 +0530 Subject: [PATCH] Fixed artstation ripper not ripping images. --- .../ripper/rippers/ArtStationRipper.java | 57 +++++++++++++++---- 1 file changed, 45 insertions(+), 12 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ArtStationRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ArtStationRipper.java index a81f8d84..6e1b4820 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/ArtStationRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ArtStationRipper.java @@ -7,14 +7,12 @@ import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; - -import com.rarchives.ripme.ripper.AbstractJSONRipper; -import com.rarchives.ripme.utils.Http; - import org.json.JSONObject; import org.jsoup.Connection; import org.jsoup.Connection.Method; import org.jsoup.Connection.Response; +import com.rarchives.ripme.ripper.AbstractJSONRipper; +import com.rarchives.ripme.utils.Http; public class ArtStationRipper extends AbstractJSONRipper { enum URL_TYPE { @@ -50,7 +48,8 @@ public class ArtStationRipper extends AbstractJSONRipper { if (albumURL.getType() == URL_TYPE.SINGLE_PROJECT) { // URL points to single project, use project title as GID try { - groupData = Http.url(albumURL.getLocation()).getJSON(); + // groupData = Http.url(albumURL.getLocation()).getJSON(); + groupData = getJson(albumURL.getLocation()); } catch (IOException e) { throw new MalformedURLException("Couldn't load JSON from " + albumURL.getLocation()); } @@ -61,7 +60,8 @@ public class ArtStationRipper extends AbstractJSONRipper { // URL points to user portfolio, use user's full name as GID String userInfoURL = "https://www.artstation.com/users/" + albumURL.getID() + "/quick.json"; try { - groupData = Http.url(userInfoURL).getJSON(); + // groupData = Http.url(userInfoURL).getJSON(); + groupData = getJson(userInfoURL); } catch (IOException e) { throw new MalformedURLException("Couldn't load JSON from " + userInfoURL); } @@ -77,19 +77,22 @@ public class ArtStationRipper extends AbstractJSONRipper { protected JSONObject getFirstPage() throws IOException { if (albumURL.getType() == URL_TYPE.SINGLE_PROJECT) { // URL points to JSON of a single project, just return it - return Http.url(albumURL.getLocation()).getJSON(); + // return Http.url(albumURL.getLocation()).getJSON(); + return getJson(albumURL.getLocation()); } if (albumURL.getType() == URL_TYPE.USER_PORTFOLIO) { // URL points to JSON of a list of projects, load it to parse individual // projects - JSONObject albumContent = Http.url(albumURL.getLocation()).getJSON(); + // JSONObject albumContent = Http.url(albumURL.getLocation()).getJSON(); + JSONObject albumContent = getJson(albumURL.getLocation()); if (albumContent.getInt("total_count") > 0) { // Get JSON of the first project and return it JSONObject projectInfo = albumContent.getJSONArray("data").getJSONObject(0); ParsedURL projectURL = parseURL(new URL(projectInfo.getString("permalink"))); - return Http.url(projectURL.getLocation()).getJSON(); + // return Http.url(projectURL.getLocation()).getJSON(); + return getJson(projectURL.getLocation()); } } @@ -115,14 +118,16 @@ public class ArtStationRipper extends AbstractJSONRipper { } Integer currentProject = ((projectPageNumber - 1) * 50) + (projectIndex + 1); - JSONObject albumContent = Http.url(albumURL.getLocation() + "?page=" + projectPageNumber).getJSON(); + // JSONObject albumContent = Http.url(albumURL.getLocation() + "?page=" + projectPageNumber).getJSON(); + JSONObject albumContent = getJson(albumURL.getLocation() + "?page=" + projectPageNumber); if (albumContent.getInt("total_count") > currentProject) { // Get JSON of the next project and return it JSONObject projectInfo = albumContent.getJSONArray("data").getJSONObject(projectIndex); ParsedURL projectURL = parseURL(new URL(projectInfo.getString("permalink"))); projectIndex++; - return Http.url(projectURL.getLocation()).getJSON(); + // return Http.url(projectURL.getLocation()).getJSON(); + return getJson(projectURL.getLocation()); } throw new IOException("No more projects"); @@ -249,9 +254,14 @@ public class ArtStationRipper extends AbstractJSONRipper { // htmlSource = Http.url(url).get().html(); Connection con = Http.url(url).method(Method.GET).connection(); con.ignoreHttpErrors(true); + con.userAgent("Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:67.0) Gecko/20100101 Firefox/67.0"); + con.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"); + con.header("Accept-Language", "en-US,en;q=0.5"); + con.header("Accept-Encoding", "gzip, deflate, br"); + con.header("Upgrade-Insecure-Requests", "1"); Response res = con.execute(); int status = res.statusCode(); - + if (status / 100 == 2) { htmlSource = res.parse().html(); } else if (status == 403 && url.toString().contains("artwork/")) { @@ -291,5 +301,28 @@ public class ArtStationRipper extends AbstractJSONRipper { parsedURL = new ParsedURL(URL_TYPE.UNKNOWN, null, null); return parsedURL; } + + // Use this method instead of direct call to Http.url(url).getJson() to avoid cloudflare 403 page. + private JSONObject getJson(URL url) throws IOException { + Connection con = Http.url(url).method(Method.GET).connection(); + con.ignoreHttpErrors(true); + con.ignoreContentType(true); + con.userAgent("Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:67.0) Gecko/20100101 Firefox/67.0"); + con.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"); + con.header("Accept-Language", "en-US,en;q=0.5"); + con.header("Accept-Encoding", "gzip, deflate, br"); + con.header("Upgrade-Insecure-Requests", "1"); + Response res = con.execute(); + int status = res.statusCode(); + if (status / 100 == 2) { + String jsonString = res.body(); + return new JSONObject(jsonString); + } + throw new IOException("Error fetching json. Status code:" + status); + } + + private JSONObject getJson(String url) throws IOException{ + return getJson(new URL(url)); + } }