diff --git a/src/main/java/com/rarchives/ripme/utils/Http.java b/src/main/java/com/rarchives/ripme/utils/Http.java index 885a194d..1b85005c 100644 --- a/src/main/java/com/rarchives/ripme/utils/Http.java +++ b/src/main/java/com/rarchives/ripme/utils/Http.java @@ -1,16 +1,21 @@ package com.rarchives.ripme.utils; import java.io.IOException; +import java.net.MalformedURLException; import java.net.URL; import java.util.HashMap; import java.util.Map; +import org.apache.commons.lang.ArrayUtils; +import org.apache.commons.lang.StringUtils; import org.apache.log4j.Logger; import org.json.JSONObject; import org.jsoup.Connection; import org.jsoup.Connection.Method; import org.jsoup.Connection.Response; +import org.jsoup.helper.StringUtil; import org.jsoup.Jsoup; +import org.jsoup.HttpStatusException; import org.jsoup.nodes.Document; import com.rarchives.ripme.ripper.AbstractRipper; @@ -22,8 +27,8 @@ import com.rarchives.ripme.ripper.AbstractRipper; */ public class Http { - private static final int TIMEOUT = Utils.getConfigInteger("page.timeout", 5 * 1000); - private static final Logger logger = Logger.getLogger(Http.class); + private static final int TIMEOUT = Utils.getConfigInteger("page.timeout", 5 * 1000); + private static final Logger logger = Logger.getLogger(Http.class); private int retries; private String url; @@ -53,6 +58,50 @@ public class Http { connection.method(Method.GET); connection.timeout(TIMEOUT); connection.maxBodySize(0); + + // Extract cookies from config entry: + // Example config entry: + // cookies.reddit.com = reddit_session=; other_cookie= + connection.cookies(cookiesForURL(this.url)); + } + + private Map cookiesForURL(String u) { + Map cookiesParsed = new HashMap<>(); + + String cookieDomain = ""; + try { + URL parsed = new URL(u); + String cookieStr = ""; + + String[] parts = parsed.getHost().split("\\."); + + // if url is www.reddit.com, we should also use cookies from reddit.com; + // this rule is applied for all subdomains (for all rippers); e.g. also + // old.reddit.com, new.reddit.com + while (parts.length > 1) { + String domain = String.join(".", parts); + // Try to get cookies for this host from config + cookieStr = Utils.getConfigString("cookies." + domain, ""); + if (cookieStr.equals("")) { + cookieDomain = domain; + // we found something, start parsing + break; + } + parts = (String[]) ArrayUtils.remove(parts, 0); + } + + if (!cookieStr.equals("")) { + cookiesParsed = RipUtils.getCookiesFromString(cookieStr.trim()); + } + } catch (MalformedURLException e) { + logger.warn("Parsing url " + u + " while getting cookies", e); + } + + if (cookiesParsed.size() > 0) { + logger.info("Cookies for " + cookieDomain + " have been added to this request"); + } + + return cookiesParsed; } // Setters @@ -130,6 +179,20 @@ public class Http { response = connection.execute(); return response; } catch (IOException e) { + // Warn users about possibly fixable permission error + if (e instanceof org.jsoup.HttpStatusException) { + HttpStatusException ex = (HttpStatusException)e; + + // These status codes might indicate missing cookies + // 401 Unauthorized + // 403 Forbidden + + int status = ex.getStatusCode(); + if (status == 401 || status == 403) { + throw new IOException("Failed to load " + url + ": Status Code " + Integer.toString(status) + ". You might be able to circumvent this error by setting cookies for this domain" , e); + } + } + logger.warn("Error while loading " + url, e); lastException = e; } diff --git a/src/main/java/com/rarchives/ripme/utils/RipUtils.java b/src/main/java/com/rarchives/ripme/utils/RipUtils.java index 5dea166b..03a480cf 100644 --- a/src/main/java/com/rarchives/ripme/utils/RipUtils.java +++ b/src/main/java/com/rarchives/ripme/utils/RipUtils.java @@ -301,7 +301,7 @@ public class RipUtils { Map cookies = new HashMap<>(); for (String pair : line.split(";")) { String[] kv = pair.split("="); - cookies.put(kv[0], kv[1]); + cookies.put(kv[0].trim(), kv[1]); } return cookies; }