From 227161bb31e2f4e50aa398a72291fbbdd11a4068 Mon Sep 17 00:00:00 2001 From: Philipp Erhardt Date: Sun, 10 Nov 2019 11:22:56 +0100 Subject: [PATCH] Add cookie support for all rippers --- .../java/com/rarchives/ripme/utils/Http.java | 45 ++++++++++++++++++- .../com/rarchives/ripme/utils/RipUtils.java | 2 +- 2 files changed, 44 insertions(+), 3 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/utils/Http.java b/src/main/java/com/rarchives/ripme/utils/Http.java index 885a194d..1776463a 100644 --- a/src/main/java/com/rarchives/ripme/utils/Http.java +++ b/src/main/java/com/rarchives/ripme/utils/Http.java @@ -1,15 +1,19 @@ package com.rarchives.ripme.utils; import java.io.IOException; +import java.net.MalformedURLException; import java.net.URL; import java.util.HashMap; import java.util.Map; +import org.apache.commons.lang.ArrayUtils; +import org.apache.commons.lang.StringUtils; import org.apache.log4j.Logger; import org.json.JSONObject; import org.jsoup.Connection; import org.jsoup.Connection.Method; import org.jsoup.Connection.Response; +import org.jsoup.helper.StringUtil; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; @@ -22,8 +26,8 @@ import com.rarchives.ripme.ripper.AbstractRipper; */ public class Http { - private static final int TIMEOUT = Utils.getConfigInteger("page.timeout", 5 * 1000); - private static final Logger logger = Logger.getLogger(Http.class); + private static final int TIMEOUT = Utils.getConfigInteger("page.timeout", 5 * 1000); + private static final Logger logger = Logger.getLogger(Http.class); private int retries; private String url; @@ -53,6 +57,43 @@ public class Http { connection.method(Method.GET); connection.timeout(TIMEOUT); connection.maxBodySize(0); + + // Extract cookies from config entry: + // Example config entry: + // cookies.reddit.com = reddit_session=; other_cookie= + connection.cookies(cookiesForURL(this.url)); + } + + private Map cookiesForURL(String u) { + Map cookiesParsed = new HashMap<>(); + + try { + URL parsed = new URL(this.url); + String cookieStr = ""; + + String[] parts = parsed.getHost().split("\\."); + + // if url is www.reddit.com, we should also use cookies from reddit.com; + // this rule is applied for all subdomains (for all rippers); e.g. also + // old.reddit.com, new.reddit.com + while (parts.length > 1) { + // Try to get cookies for this host from config + cookieStr = Utils.getConfigString("cookies." + String.join(".", parts), ""); + if (cookieStr != "") { + // we found something, start parsing + break; + } + parts = (String[]) ArrayUtils.remove(parts, 0); + } + + if (cookieStr != "") { + cookiesParsed = RipUtils.getCookiesFromString(cookieStr.trim()); + } + } catch (MalformedURLException e) { + logger.warn("Parsing url while getting cookies" + url, e); + } + + return cookiesParsed; } // Setters diff --git a/src/main/java/com/rarchives/ripme/utils/RipUtils.java b/src/main/java/com/rarchives/ripme/utils/RipUtils.java index 5dea166b..03a480cf 100644 --- a/src/main/java/com/rarchives/ripme/utils/RipUtils.java +++ b/src/main/java/com/rarchives/ripme/utils/RipUtils.java @@ -301,7 +301,7 @@ public class RipUtils { Map cookies = new HashMap<>(); for (String pair : line.split(";")) { String[] kv = pair.split("="); - cookies.put(kv[0], kv[1]); + cookies.put(kv[0].trim(), kv[1]); } return cookies; }