mirror of
https://github.com/RipMeApp/ripme.git
synced 2025-09-02 18:33:13 +02:00
Make NewsfilterRipper inherit from AbstractHTMLRipper
Also add basic unit test (except getGID() and getAlbumTitle()) for this ripper.
This commit is contained in:
@@ -3,9 +3,13 @@ package com.rarchives.ripme.ripper.rippers;
|
|||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.MalformedURLException;
|
import java.net.MalformedURLException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
import java.util.regex.Matcher;
|
import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||||
|
import com.rarchives.ripme.utils.Http;
|
||||||
import org.jsoup.Connection;
|
import org.jsoup.Connection;
|
||||||
import org.jsoup.Jsoup;
|
import org.jsoup.Jsoup;
|
||||||
import org.jsoup.nodes.Document;
|
import org.jsoup.nodes.Document;
|
||||||
@@ -14,21 +18,15 @@ import org.jsoup.select.Elements;
|
|||||||
|
|
||||||
import com.rarchives.ripme.ripper.AlbumRipper;
|
import com.rarchives.ripme.ripper.AlbumRipper;
|
||||||
|
|
||||||
public class NewsfilterRipper extends AlbumRipper {
|
public class NewsfilterRipper extends AbstractHTMLRipper {
|
||||||
|
|
||||||
private static final String HOST = "newsfilter";
|
private static final String HOST = "newsfilter";
|
||||||
|
private static final String DOMAIN = "newsfilter.org";
|
||||||
|
|
||||||
public NewsfilterRipper(URL url) throws IOException {
|
public NewsfilterRipper(URL url) throws IOException {
|
||||||
super(url);
|
super(url);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean canRip(URL url) {
|
|
||||||
//http://newsfilter.org/gallery/he-doubted-she-would-fuck-on-cam-happy-to-be-proven-wrong-216799
|
|
||||||
Pattern p = Pattern.compile("^https?://([wm]+\\.)?newsfilter\\.org/gallery/.+$");
|
|
||||||
Matcher m = p.matcher(url.toExternalForm());
|
|
||||||
return m.matches();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public URL sanitizeURL(URL url) throws MalformedURLException {
|
public URL sanitizeURL(URL url) throws MalformedURLException {
|
||||||
String u = url.toExternalForm();
|
String u = url.toExternalForm();
|
||||||
@@ -40,27 +38,15 @@ public class NewsfilterRipper extends AlbumRipper {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void rip() throws IOException {
|
public String getGID(URL url) throws MalformedURLException {
|
||||||
String gid = getGID(this.url);
|
Pattern p = Pattern.compile("^https?://([wm]+\\.)?newsfilter\\.org/gallery/([^/]+)$");
|
||||||
String theurl = "http://newsfilter.org/gallery/" + gid;
|
Matcher m = p.matcher(url.toExternalForm());
|
||||||
LOGGER.info("Loading " + theurl);
|
if (m.matches()) {
|
||||||
|
return m.group(2);
|
||||||
Connection.Response resp = Jsoup.connect(theurl)
|
|
||||||
.timeout(5000)
|
|
||||||
.referrer("")
|
|
||||||
.userAgent(USER_AGENT)
|
|
||||||
.method(Connection.Method.GET)
|
|
||||||
.execute();
|
|
||||||
Document doc = resp.parse();
|
|
||||||
|
|
||||||
Elements thumbnails = doc.select("#galleryImages .inner-block img");
|
|
||||||
for (Element thumb : thumbnails) {
|
|
||||||
String thumbUrl = thumb.attr("src");
|
|
||||||
String picUrl = thumbUrl.replace("thumbs/", "");
|
|
||||||
addURLToDownload(new URL(picUrl));
|
|
||||||
}
|
}
|
||||||
|
throw new MalformedURLException(
|
||||||
waitForThreads();
|
"Expected newsfilter gallery format: http://newsfilter.org/gallery/galleryid" +
|
||||||
|
" Got: " + url);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@@ -69,14 +55,30 @@ public class NewsfilterRipper extends AlbumRipper {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getGID(URL url) throws MalformedURLException {
|
protected String getDomain() {
|
||||||
Pattern p = Pattern.compile("^https?://([wm]+\\.)?newsfilter\\.org/gallery/([^/]+)$");
|
return DOMAIN;
|
||||||
Matcher m = p.matcher(url.toExternalForm());
|
}
|
||||||
if (m.matches()) {
|
|
||||||
return m.group(2);
|
@Override
|
||||||
|
protected Document getFirstPage() throws IOException {
|
||||||
|
return Http.url(url).get();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected List<String> getURLsFromPage(Document page) {
|
||||||
|
List<String> imgURLs = new ArrayList<>();
|
||||||
|
Elements thumbnails = page.select("#galleryImages .inner-block img");
|
||||||
|
for (Element thumb : thumbnails) {
|
||||||
|
String thumbUrl = thumb.attr("src");
|
||||||
|
String picUrl = thumbUrl.replace("thumbs/", "");
|
||||||
|
// use HTTP instead of HTTPS (less headaches)
|
||||||
|
imgURLs.add(picUrl.replaceFirst("https://", "http://"));
|
||||||
}
|
}
|
||||||
throw new MalformedURLException(
|
return imgURLs;
|
||||||
"Expected newsfilter gallery format: http://newsfilter.org/gallery/galleryid" +
|
}
|
||||||
" Got: " + url);
|
|
||||||
|
@Override
|
||||||
|
protected void downloadURL(URL url, int index) {
|
||||||
|
addURLToDownload(url, getPrefix(index));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -6,5 +6,9 @@ import java.net.URL;
|
|||||||
import com.rarchives.ripme.ripper.rippers.NewsfilterRipper;
|
import com.rarchives.ripme.ripper.rippers.NewsfilterRipper;
|
||||||
|
|
||||||
public class NewsfilterRipperTest extends RippersTest {
|
public class NewsfilterRipperTest extends RippersTest {
|
||||||
// TODO add a test
|
|
||||||
|
public void testNewsfilterRip() throws IOException {
|
||||||
|
NewsfilterRipper ripper = new NewsfilterRipper(new URL("http://newsfilter.org/gallery/he-doubted-she-would-fuck-on-cam-happy-to-be-proven-wrong-216799"));
|
||||||
|
testRipper(ripper);
|
||||||
|
}
|
||||||
}
|
}
|
Reference in New Issue
Block a user