1
0
mirror of https://github.com/RipMeApp/ripme.git synced 2025-09-02 18:33:13 +02:00

Make NewsfilterRipper inherit from AbstractHTMLRipper

Also add basic unit test (except getGID() and getAlbumTitle()) for this ripper.
This commit is contained in:
Peter Szakacs
2018-10-27 22:13:01 +02:00
parent 4bf94d7f86
commit 4881025915
2 changed files with 44 additions and 38 deletions

View File

@@ -3,9 +3,13 @@ package com.rarchives.ripme.ripper.rippers;
import java.io.IOException; import java.io.IOException;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.net.URL; import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
import org.jsoup.Connection; import org.jsoup.Connection;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
@@ -14,21 +18,15 @@ import org.jsoup.select.Elements;
import com.rarchives.ripme.ripper.AlbumRipper; import com.rarchives.ripme.ripper.AlbumRipper;
public class NewsfilterRipper extends AlbumRipper { public class NewsfilterRipper extends AbstractHTMLRipper {
private static final String HOST = "newsfilter"; private static final String HOST = "newsfilter";
private static final String DOMAIN = "newsfilter.org";
public NewsfilterRipper(URL url) throws IOException { public NewsfilterRipper(URL url) throws IOException {
super(url); super(url);
} }
@Override
public boolean canRip(URL url) {
//http://newsfilter.org/gallery/he-doubted-she-would-fuck-on-cam-happy-to-be-proven-wrong-216799
Pattern p = Pattern.compile("^https?://([wm]+\\.)?newsfilter\\.org/gallery/.+$");
Matcher m = p.matcher(url.toExternalForm());
return m.matches();
}
@Override @Override
public URL sanitizeURL(URL url) throws MalformedURLException { public URL sanitizeURL(URL url) throws MalformedURLException {
String u = url.toExternalForm(); String u = url.toExternalForm();
@@ -40,27 +38,15 @@ public class NewsfilterRipper extends AlbumRipper {
} }
@Override @Override
public void rip() throws IOException { public String getGID(URL url) throws MalformedURLException {
String gid = getGID(this.url); Pattern p = Pattern.compile("^https?://([wm]+\\.)?newsfilter\\.org/gallery/([^/]+)$");
String theurl = "http://newsfilter.org/gallery/" + gid; Matcher m = p.matcher(url.toExternalForm());
LOGGER.info("Loading " + theurl); if (m.matches()) {
return m.group(2);
Connection.Response resp = Jsoup.connect(theurl)
.timeout(5000)
.referrer("")
.userAgent(USER_AGENT)
.method(Connection.Method.GET)
.execute();
Document doc = resp.parse();
Elements thumbnails = doc.select("#galleryImages .inner-block img");
for (Element thumb : thumbnails) {
String thumbUrl = thumb.attr("src");
String picUrl = thumbUrl.replace("thumbs/", "");
addURLToDownload(new URL(picUrl));
} }
throw new MalformedURLException(
waitForThreads(); "Expected newsfilter gallery format: http://newsfilter.org/gallery/galleryid" +
" Got: " + url);
} }
@Override @Override
@@ -69,14 +55,30 @@ public class NewsfilterRipper extends AlbumRipper {
} }
@Override @Override
public String getGID(URL url) throws MalformedURLException { protected String getDomain() {
Pattern p = Pattern.compile("^https?://([wm]+\\.)?newsfilter\\.org/gallery/([^/]+)$"); return DOMAIN;
Matcher m = p.matcher(url.toExternalForm()); }
if (m.matches()) {
return m.group(2); @Override
protected Document getFirstPage() throws IOException {
return Http.url(url).get();
}
@Override
protected List<String> getURLsFromPage(Document page) {
List<String> imgURLs = new ArrayList<>();
Elements thumbnails = page.select("#galleryImages .inner-block img");
for (Element thumb : thumbnails) {
String thumbUrl = thumb.attr("src");
String picUrl = thumbUrl.replace("thumbs/", "");
// use HTTP instead of HTTPS (less headaches)
imgURLs.add(picUrl.replaceFirst("https://", "http://"));
} }
throw new MalformedURLException( return imgURLs;
"Expected newsfilter gallery format: http://newsfilter.org/gallery/galleryid" + }
" Got: " + url);
@Override
protected void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
} }
} }

View File

@@ -6,5 +6,9 @@ import java.net.URL;
import com.rarchives.ripme.ripper.rippers.NewsfilterRipper; import com.rarchives.ripme.ripper.rippers.NewsfilterRipper;
public class NewsfilterRipperTest extends RippersTest { public class NewsfilterRipperTest extends RippersTest {
// TODO add a test
public void testNewsfilterRip() throws IOException {
NewsfilterRipper ripper = new NewsfilterRipper(new URL("http://newsfilter.org/gallery/he-doubted-she-would-fuck-on-cam-happy-to-be-proven-wrong-216799"));
testRipper(ripper);
}
} }