1
0
mirror of https://github.com/RipMeApp/ripme.git synced 2025-01-18 21:17:59 +01:00

ChanRipper: Add new image host domain for 4chan.

This commit is contained in:
metaprime 2016-12-19 19:49:36 -08:00
parent 2fee385589
commit 034f7ee181

View File

@ -18,24 +18,24 @@ import com.rarchives.ripme.ripper.rippers.ripperhelpers.ChanSite;
import com.rarchives.ripme.utils.Http; import com.rarchives.ripme.utils.Http;
public class ChanRipper extends AbstractHTMLRipper { public class ChanRipper extends AbstractHTMLRipper {
public static List<ChanSite> explicit_domains = Arrays.asList( public static List<ChanSite> explicit_domains = Arrays.asList(
new ChanSite(Arrays.asList("boards.4chan.org"), Arrays.asList("4cdn.org")), new ChanSite(Arrays.asList("boards.4chan.org"), Arrays.asList("4cdn.org", "is.4chan.org")),
new ChanSite(Arrays.asList("archive.moe"), Arrays.asList("data.archive.moe")), new ChanSite(Arrays.asList("archive.moe"), Arrays.asList("data.archive.moe")),
new ChanSite(Arrays.asList("4archive.org"), Arrays.asList("imgur.com")), new ChanSite(Arrays.asList("4archive.org"), Arrays.asList("imgur.com")),
new ChanSite(Arrays.asList("archive.4plebs.org"), Arrays.asList("img.4plebs.org")), new ChanSite(Arrays.asList("archive.4plebs.org"), Arrays.asList("img.4plebs.org")),
new ChanSite(Arrays.asList("fgts.jp"), Arrays.asList("dat.fgtsi.org")) new ChanSite(Arrays.asList("fgts.jp"), Arrays.asList("dat.fgtsi.org"))
); );
public static List<String> url_piece_blacklist = Arrays.asList( public static List<String> url_piece_blacklist = Arrays.asList(
"=http", "=http",
"http://imgops.com/", "http://imgops.com/",
"iqdb.org", "iqdb.org",
"saucenao.com" "saucenao.com"
); );
public ChanSite chanSite; public ChanSite chanSite;
public Boolean generalChanSite = true; public Boolean generalChanSite = true;
public ChanRipper(URL url) throws IOException { public ChanRipper(URL url) throws IOException {
super(url); super(url);
for (ChanSite _chanSite : explicit_domains) { for (ChanSite _chanSite : explicit_domains) {
@ -133,7 +133,7 @@ public class ChanRipper extends AbstractHTMLRipper {
if (url.contains(blacklist_item)) { if (url.contains(blacklist_item)) {
logger.debug("Skipping link that contains '"+blacklist_item+"': " + url); logger.debug("Skipping link that contains '"+blacklist_item+"': " + url);
return true; return true;
} }
} }
return false; return false;
} }
@ -142,7 +142,7 @@ public class ChanRipper extends AbstractHTMLRipper {
List<String> imageURLs = new ArrayList<String>(); List<String> imageURLs = new ArrayList<String>();
Pattern p; Matcher m; Pattern p; Matcher m;
for (Element link : page.select("a")) { for (Element link : page.select("a")) {
if (!link.hasAttr("href")) { if (!link.hasAttr("href")) {
continue; continue;
} }
String href = link.attr("href").trim(); String href = link.attr("href").trim();
@ -154,10 +154,10 @@ public class ChanRipper extends AbstractHTMLRipper {
Boolean self_hosted = false; Boolean self_hosted = false;
if (!generalChanSite) { if (!generalChanSite) {
for (String cdnDomain : chanSite.cdnDomains) { for (String cdnDomain : chanSite.cdnDomains) {
if (href.contains(cdnDomain)){ if (href.contains(cdnDomain)){
self_hosted = true; self_hosted = true;
} }
} }
} }
if (self_hosted || generalChanSite){ if (self_hosted || generalChanSite){
@ -182,7 +182,7 @@ public class ChanRipper extends AbstractHTMLRipper {
} }
} else { } else {
//TODO also grab imgur/flickr albums (And all other supported rippers) Maybe add a setting? //TODO also grab imgur/flickr albums (And all other supported rippers) Maybe add a setting?
} }
if (isStopped()) { if (isStopped()) {
break; break;
@ -194,5 +194,5 @@ public class ChanRipper extends AbstractHTMLRipper {
@Override @Override
public void downloadURL(URL url, int index) { public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index), "", this.url.toString(), null); addURLToDownload(url, getPrefix(index), "", this.url.toString(), null);
} }
} }