1
0
mirror of https://github.com/RipMeApp/ripme.git synced 2025-08-15 10:24:04 +02:00

Merge pull request #1483 from xarantolus/master

Add cookie support for all rippers
This commit is contained in:
cyian-1756
2019-12-03 06:39:56 -05:00
committed by GitHub
2 changed files with 66 additions and 3 deletions

View File

@@ -1,16 +1,21 @@
package com.rarchives.ripme.utils;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.HashMap;
import java.util.Map;
import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;
import org.json.JSONObject;
import org.jsoup.Connection;
import org.jsoup.Connection.Method;
import org.jsoup.Connection.Response;
import org.jsoup.helper.StringUtil;
import org.jsoup.Jsoup;
import org.jsoup.HttpStatusException;
import org.jsoup.nodes.Document;
import com.rarchives.ripme.ripper.AbstractRipper;
@@ -22,8 +27,8 @@ import com.rarchives.ripme.ripper.AbstractRipper;
*/
public class Http {
private static final int TIMEOUT = Utils.getConfigInteger("page.timeout", 5 * 1000);
private static final Logger logger = Logger.getLogger(Http.class);
private static final int TIMEOUT = Utils.getConfigInteger("page.timeout", 5 * 1000);
private static final Logger logger = Logger.getLogger(Http.class);
private int retries;
private String url;
@@ -53,6 +58,50 @@ public class Http {
connection.method(Method.GET);
connection.timeout(TIMEOUT);
connection.maxBodySize(0);
// Extract cookies from config entry:
// Example config entry:
// cookies.reddit.com = reddit_session=<value>; other_cookie=<value>
connection.cookies(cookiesForURL(this.url));
}
private Map<String, String> cookiesForURL(String u) {
Map<String, String> cookiesParsed = new HashMap<>();
String cookieDomain = "";
try {
URL parsed = new URL(u);
String cookieStr = "";
String[] parts = parsed.getHost().split("\\.");
// if url is www.reddit.com, we should also use cookies from reddit.com;
// this rule is applied for all subdomains (for all rippers); e.g. also
// old.reddit.com, new.reddit.com
while (parts.length > 1) {
String domain = String.join(".", parts);
// Try to get cookies for this host from config
cookieStr = Utils.getConfigString("cookies." + domain, "");
if (cookieStr.equals("")) {
cookieDomain = domain;
// we found something, start parsing
break;
}
parts = (String[]) ArrayUtils.remove(parts, 0);
}
if (!cookieStr.equals("")) {
cookiesParsed = RipUtils.getCookiesFromString(cookieStr.trim());
}
} catch (MalformedURLException e) {
logger.warn("Parsing url " + u + " while getting cookies", e);
}
if (cookiesParsed.size() > 0) {
logger.info("Cookies for " + cookieDomain + " have been added to this request");
}
return cookiesParsed;
}
// Setters
@@ -130,6 +179,20 @@ public class Http {
response = connection.execute();
return response;
} catch (IOException e) {
// Warn users about possibly fixable permission error
if (e instanceof org.jsoup.HttpStatusException) {
HttpStatusException ex = (HttpStatusException)e;
// These status codes might indicate missing cookies
// 401 Unauthorized
// 403 Forbidden
int status = ex.getStatusCode();
if (status == 401 || status == 403) {
throw new IOException("Failed to load " + url + ": Status Code " + Integer.toString(status) + ". You might be able to circumvent this error by setting cookies for this domain" , e);
}
}
logger.warn("Error while loading " + url, e);
lastException = e;
}

View File

@@ -301,7 +301,7 @@ public class RipUtils {
Map<String,String> cookies = new HashMap<>();
for (String pair : line.split(";")) {
String[] kv = pair.split("=");
cookies.put(kv[0], kv[1]);
cookies.put(kv[0].trim(), kv[1]);
}
return cookies;
}