1
0
mirror of https://github.com/RipMeApp/ripme.git synced 2025-08-11 00:14:12 +02:00

Merge pull request #1289 from Tush-r/artstation

Fixed artstation ripper not ripping images.
This commit is contained in:
cyian-1756
2019-05-04 17:30:03 -05:00
committed by GitHub

View File

@@ -7,14 +7,12 @@ import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import com.rarchives.ripme.ripper.AbstractJSONRipper;
import com.rarchives.ripme.utils.Http;
import org.json.JSONObject; import org.json.JSONObject;
import org.jsoup.Connection; import org.jsoup.Connection;
import org.jsoup.Connection.Method; import org.jsoup.Connection.Method;
import org.jsoup.Connection.Response; import org.jsoup.Connection.Response;
import com.rarchives.ripme.ripper.AbstractJSONRipper;
import com.rarchives.ripme.utils.Http;
public class ArtStationRipper extends AbstractJSONRipper { public class ArtStationRipper extends AbstractJSONRipper {
enum URL_TYPE { enum URL_TYPE {
@@ -50,7 +48,8 @@ public class ArtStationRipper extends AbstractJSONRipper {
if (albumURL.getType() == URL_TYPE.SINGLE_PROJECT) { if (albumURL.getType() == URL_TYPE.SINGLE_PROJECT) {
// URL points to single project, use project title as GID // URL points to single project, use project title as GID
try { try {
groupData = Http.url(albumURL.getLocation()).getJSON(); // groupData = Http.url(albumURL.getLocation()).getJSON();
groupData = getJson(albumURL.getLocation());
} catch (IOException e) { } catch (IOException e) {
throw new MalformedURLException("Couldn't load JSON from " + albumURL.getLocation()); throw new MalformedURLException("Couldn't load JSON from " + albumURL.getLocation());
} }
@@ -61,7 +60,8 @@ public class ArtStationRipper extends AbstractJSONRipper {
// URL points to user portfolio, use user's full name as GID // URL points to user portfolio, use user's full name as GID
String userInfoURL = "https://www.artstation.com/users/" + albumURL.getID() + "/quick.json"; String userInfoURL = "https://www.artstation.com/users/" + albumURL.getID() + "/quick.json";
try { try {
groupData = Http.url(userInfoURL).getJSON(); // groupData = Http.url(userInfoURL).getJSON();
groupData = getJson(userInfoURL);
} catch (IOException e) { } catch (IOException e) {
throw new MalformedURLException("Couldn't load JSON from " + userInfoURL); throw new MalformedURLException("Couldn't load JSON from " + userInfoURL);
} }
@@ -77,19 +77,22 @@ public class ArtStationRipper extends AbstractJSONRipper {
protected JSONObject getFirstPage() throws IOException { protected JSONObject getFirstPage() throws IOException {
if (albumURL.getType() == URL_TYPE.SINGLE_PROJECT) { if (albumURL.getType() == URL_TYPE.SINGLE_PROJECT) {
// URL points to JSON of a single project, just return it // URL points to JSON of a single project, just return it
return Http.url(albumURL.getLocation()).getJSON(); // return Http.url(albumURL.getLocation()).getJSON();
return getJson(albumURL.getLocation());
} }
if (albumURL.getType() == URL_TYPE.USER_PORTFOLIO) { if (albumURL.getType() == URL_TYPE.USER_PORTFOLIO) {
// URL points to JSON of a list of projects, load it to parse individual // URL points to JSON of a list of projects, load it to parse individual
// projects // projects
JSONObject albumContent = Http.url(albumURL.getLocation()).getJSON(); // JSONObject albumContent = Http.url(albumURL.getLocation()).getJSON();
JSONObject albumContent = getJson(albumURL.getLocation());
if (albumContent.getInt("total_count") > 0) { if (albumContent.getInt("total_count") > 0) {
// Get JSON of the first project and return it // Get JSON of the first project and return it
JSONObject projectInfo = albumContent.getJSONArray("data").getJSONObject(0); JSONObject projectInfo = albumContent.getJSONArray("data").getJSONObject(0);
ParsedURL projectURL = parseURL(new URL(projectInfo.getString("permalink"))); ParsedURL projectURL = parseURL(new URL(projectInfo.getString("permalink")));
return Http.url(projectURL.getLocation()).getJSON(); // return Http.url(projectURL.getLocation()).getJSON();
return getJson(projectURL.getLocation());
} }
} }
@@ -115,14 +118,16 @@ public class ArtStationRipper extends AbstractJSONRipper {
} }
Integer currentProject = ((projectPageNumber - 1) * 50) + (projectIndex + 1); Integer currentProject = ((projectPageNumber - 1) * 50) + (projectIndex + 1);
JSONObject albumContent = Http.url(albumURL.getLocation() + "?page=" + projectPageNumber).getJSON(); // JSONObject albumContent = Http.url(albumURL.getLocation() + "?page=" + projectPageNumber).getJSON();
JSONObject albumContent = getJson(albumURL.getLocation() + "?page=" + projectPageNumber);
if (albumContent.getInt("total_count") > currentProject) { if (albumContent.getInt("total_count") > currentProject) {
// Get JSON of the next project and return it // Get JSON of the next project and return it
JSONObject projectInfo = albumContent.getJSONArray("data").getJSONObject(projectIndex); JSONObject projectInfo = albumContent.getJSONArray("data").getJSONObject(projectIndex);
ParsedURL projectURL = parseURL(new URL(projectInfo.getString("permalink"))); ParsedURL projectURL = parseURL(new URL(projectInfo.getString("permalink")));
projectIndex++; projectIndex++;
return Http.url(projectURL.getLocation()).getJSON(); // return Http.url(projectURL.getLocation()).getJSON();
return getJson(projectURL.getLocation());
} }
throw new IOException("No more projects"); throw new IOException("No more projects");
@@ -249,9 +254,14 @@ public class ArtStationRipper extends AbstractJSONRipper {
// htmlSource = Http.url(url).get().html(); // htmlSource = Http.url(url).get().html();
Connection con = Http.url(url).method(Method.GET).connection(); Connection con = Http.url(url).method(Method.GET).connection();
con.ignoreHttpErrors(true); con.ignoreHttpErrors(true);
con.userAgent("Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:67.0) Gecko/20100101 Firefox/67.0");
con.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
con.header("Accept-Language", "en-US,en;q=0.5");
con.header("Accept-Encoding", "gzip, deflate, br");
con.header("Upgrade-Insecure-Requests", "1");
Response res = con.execute(); Response res = con.execute();
int status = res.statusCode(); int status = res.statusCode();
if (status / 100 == 2) { if (status / 100 == 2) {
htmlSource = res.parse().html(); htmlSource = res.parse().html();
} else if (status == 403 && url.toString().contains("artwork/")) { } else if (status == 403 && url.toString().contains("artwork/")) {
@@ -291,5 +301,28 @@ public class ArtStationRipper extends AbstractJSONRipper {
parsedURL = new ParsedURL(URL_TYPE.UNKNOWN, null, null); parsedURL = new ParsedURL(URL_TYPE.UNKNOWN, null, null);
return parsedURL; return parsedURL;
} }
// Use this method instead of direct call to Http.url(url).getJson() to avoid cloudflare 403 page.
private JSONObject getJson(URL url) throws IOException {
Connection con = Http.url(url).method(Method.GET).connection();
con.ignoreHttpErrors(true);
con.ignoreContentType(true);
con.userAgent("Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:67.0) Gecko/20100101 Firefox/67.0");
con.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
con.header("Accept-Language", "en-US,en;q=0.5");
con.header("Accept-Encoding", "gzip, deflate, br");
con.header("Upgrade-Insecure-Requests", "1");
Response res = con.execute();
int status = res.statusCode();
if (status / 100 == 2) {
String jsonString = res.body();
return new JSONObject(jsonString);
}
throw new IOException("Error fetching json. Status code:" + status);
}
private JSONObject getJson(String url) throws IOException{
return getJson(new URL(url));
}
} }