mirror of
https://github.com/RipMeApp/ripme.git
synced 2025-09-02 10:23:47 +02:00
Implement caching of the first page for every ripper
This commit is contained in:
@@ -32,6 +32,7 @@ public abstract class AbstractHTMLRipper extends AbstractRipper {
|
||||
private final Map<URL, File> itemsPending = Collections.synchronizedMap(new HashMap<>());
|
||||
private final Map<URL, Path> itemsCompleted = Collections.synchronizedMap(new HashMap<>());
|
||||
private final Map<URL, String> itemsErrored = Collections.synchronizedMap(new HashMap<>());
|
||||
Document cachedFirstPage;
|
||||
|
||||
protected AbstractHTMLRipper(URL url) throws IOException {
|
||||
super(url);
|
||||
@@ -41,6 +42,14 @@ public abstract class AbstractHTMLRipper extends AbstractRipper {
|
||||
public abstract String getHost();
|
||||
|
||||
protected abstract Document getFirstPage() throws IOException;
|
||||
|
||||
protected Document getCachedFirstPage() throws IOException {
|
||||
if (cachedFirstPage == null) {
|
||||
cachedFirstPage = getFirstPage();
|
||||
}
|
||||
return cachedFirstPage;
|
||||
}
|
||||
|
||||
public Document getNextPage(Document doc) throws IOException {
|
||||
return null;
|
||||
}
|
||||
@@ -98,7 +107,7 @@ public abstract class AbstractHTMLRipper extends AbstractRipper {
|
||||
int textindex = 0;
|
||||
LOGGER.info("Retrieving " + this.url);
|
||||
sendUpdate(STATUS.LOADING_RESOURCE, this.url.toExternalForm());
|
||||
Document doc = getFirstPage();
|
||||
var doc = getCachedFirstPage();
|
||||
|
||||
if (hasQueueSupport() && pageContainsAlbums(this.url)) {
|
||||
List<String> urls = getAlbumsToQueue(doc);
|
||||
|
@@ -49,7 +49,7 @@ public class AerisdiesRipper extends AbstractHTMLRipper {
|
||||
@Override
|
||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||
try {
|
||||
Element el = getFirstPage().select(".headtext").first();
|
||||
Element el = getCachedFirstPage().select(".headtext").first();
|
||||
if (el == null) {
|
||||
throw new IOException("Unable to get album title");
|
||||
}
|
||||
|
@@ -73,7 +73,7 @@ public class BatoRipper extends AbstractHTMLRipper {
|
||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||
try {
|
||||
// Attempt to use album title as GID
|
||||
return getHost() + "_" + getGID(url) + "_" + getFirstPage().select("title").first().text().replaceAll(" ", "_");
|
||||
return getHost() + "_" + getGID(url) + "_" + getCachedFirstPage().select("title").first().text().replaceAll(" ", "_");
|
||||
} catch (IOException e) {
|
||||
// Fall back to default album naming convention
|
||||
LOGGER.info("Unable to find title at " + url);
|
||||
|
@@ -104,7 +104,7 @@ public class ChanRipper extends AbstractHTMLRipper {
|
||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||
try {
|
||||
// Attempt to use album title as GID
|
||||
Document doc = getFirstPage();
|
||||
Document doc = getCachedFirstPage();
|
||||
try {
|
||||
String subject = doc.select(".post.op > .postinfo > .subject").first().text();
|
||||
return getHost() + "_" + getGID(url) + "_" + subject;
|
||||
|
@@ -53,7 +53,7 @@ public class CheveretoRipper extends AbstractHTMLRipper {
|
||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||
try {
|
||||
// Attempt to use album title as GID
|
||||
Element titleElement = getFirstPage().select("meta[property=og:title]").first();
|
||||
Element titleElement = getCachedFirstPage().select("meta[property=og:title]").first();
|
||||
String title = titleElement.attr("content");
|
||||
title = title.substring(title.lastIndexOf('/') + 1);
|
||||
return getHost() + "_" + title.trim();
|
||||
|
@@ -23,7 +23,6 @@ import com.rarchives.ripme.utils.Http;
|
||||
|
||||
public class EightmusesRipper extends AbstractHTMLRipper {
|
||||
|
||||
private Document albumDoc = null;
|
||||
private Map<String,String> cookies = new HashMap<>();
|
||||
// TODO put up a wiki page on using maps to store titles
|
||||
// the map for storing the title of each album when downloading sub albums
|
||||
@@ -64,7 +63,7 @@ public class EightmusesRipper extends AbstractHTMLRipper {
|
||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||
try {
|
||||
// Attempt to use album title as GID
|
||||
Element titleElement = getFirstPage().select("meta[name=description]").first();
|
||||
Element titleElement = getCachedFirstPage().select("meta[name=description]").first();
|
||||
String title = titleElement.attr("content");
|
||||
title = title.replace("A huge collection of free porn comics for adults. Read", "");
|
||||
title = title.replace("online for free at 8muses.com", "");
|
||||
@@ -78,12 +77,9 @@ public class EightmusesRipper extends AbstractHTMLRipper {
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
if (albumDoc == null) {
|
||||
Response resp = Http.url(url).response();
|
||||
cookies.putAll(resp.cookies());
|
||||
albumDoc = resp.parse();
|
||||
}
|
||||
return albumDoc;
|
||||
Response resp = Http.url(url).response();
|
||||
cookies.putAll(resp.cookies());
|
||||
return resp.parse();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@@ -97,7 +97,7 @@ public class EroShareRipper extends AbstractHTMLRipper {
|
||||
if (!is_profile(url)) {
|
||||
try {
|
||||
// Attempt to use album title as GID
|
||||
Element titleElement = getFirstPage().select("meta[property=og:title]").first();
|
||||
Element titleElement = getCachedFirstPage().select("meta[property=og:title]").first();
|
||||
String title = titleElement.attr("content");
|
||||
title = title.substring(title.lastIndexOf('/') + 1);
|
||||
return getHost() + "_" + getGID(url) + "_" + title.trim();
|
||||
|
@@ -71,7 +71,7 @@ public class EromeRipper extends AbstractHTMLRipper {
|
||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||
try {
|
||||
// Attempt to use album title as GID
|
||||
Element titleElement = getFirstPage().select("meta[property=og:title]").first();
|
||||
Element titleElement = getCachedFirstPage().select("meta[property=og:title]").first();
|
||||
String title = titleElement.attr("content");
|
||||
title = title.substring(title.lastIndexOf('/') + 1);
|
||||
return getHost() + "_" + getGID(url) + "_" + title.trim();
|
||||
|
@@ -20,7 +20,6 @@ import org.jsoup.nodes.Element;
|
||||
|
||||
public class FlickrRipper extends AbstractHTMLRipper {
|
||||
|
||||
private Document albumDoc = null;
|
||||
private final DownloadThreadPool flickrThreadPool;
|
||||
|
||||
private enum UrlType {
|
||||
@@ -178,7 +177,7 @@ public class FlickrRipper extends AbstractHTMLRipper {
|
||||
}
|
||||
try {
|
||||
// Attempt to use album title as GID
|
||||
Document doc = getFirstPage();
|
||||
Document doc = getCachedFirstPage();
|
||||
String user = url.toExternalForm();
|
||||
user = user.substring(user.indexOf("/photos/") + "/photos/".length());
|
||||
user = user.substring(0, user.indexOf("/"));
|
||||
@@ -230,10 +229,7 @@ public class FlickrRipper extends AbstractHTMLRipper {
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
if (albumDoc == null) {
|
||||
albumDoc = Http.url(url).get();
|
||||
}
|
||||
return albumDoc;
|
||||
return Http.url(url).get();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@@ -16,8 +16,6 @@ import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
||||
import com.rarchives.ripme.utils.Http;
|
||||
|
||||
public class GirlsOfDesireRipper extends AbstractHTMLRipper {
|
||||
// Current HTML document
|
||||
private Document albumDoc = null;
|
||||
|
||||
public GirlsOfDesireRipper(URL url) throws IOException {
|
||||
super(url);
|
||||
@@ -35,7 +33,7 @@ public class GirlsOfDesireRipper extends AbstractHTMLRipper {
|
||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||
try {
|
||||
// Attempt to use album title as GID
|
||||
Document doc = getFirstPage();
|
||||
Document doc = getCachedFirstPage();
|
||||
Elements elems = doc.select(".albumName");
|
||||
return getHost() + "_" + elems.first().text();
|
||||
} catch (Exception e) {
|
||||
@@ -64,10 +62,7 @@ public class GirlsOfDesireRipper extends AbstractHTMLRipper {
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
if (albumDoc == null) {
|
||||
albumDoc = Http.url(url).get();
|
||||
}
|
||||
return albumDoc;
|
||||
return Http.url(url).get();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@@ -51,7 +51,7 @@ public class HbrowseRipper extends AbstractHTMLRipper {
|
||||
@Override
|
||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||
try {
|
||||
Document doc = getFirstPage();
|
||||
Document doc = getCachedFirstPage();
|
||||
String title = doc.select("div[id=main] > table.listTable > tbody > tr > td.listLong").first().text();
|
||||
return getHost() + "_" + title + "_" + getGID(url);
|
||||
} catch (Exception e) {
|
||||
|
@@ -61,7 +61,7 @@ public class HentaifoxRipper extends AbstractHTMLRipper {
|
||||
@Override
|
||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||
try {
|
||||
Document doc = getFirstPage();
|
||||
Document doc = getCachedFirstPage();
|
||||
String title = doc.select("div.info > h1").first().text();
|
||||
return getHost() + "_" + title + "_" + getGID(url);
|
||||
} catch (Exception e) {
|
||||
|
@@ -19,9 +19,6 @@ import org.jsoup.select.Elements;
|
||||
|
||||
public class ImagebamRipper extends AbstractHTMLRipper {
|
||||
|
||||
// Current HTML document
|
||||
private Document albumDoc = null;
|
||||
|
||||
// Thread pool for finding direct image links from "image" pages (html)
|
||||
private DownloadThreadPool imagebamThreadPool = new DownloadThreadPool("imagebam");
|
||||
@Override
|
||||
@@ -61,10 +58,7 @@ public class ImagebamRipper extends AbstractHTMLRipper {
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
if (albumDoc == null) {
|
||||
albumDoc = Http.url(url).get();
|
||||
}
|
||||
return albumDoc;
|
||||
return Http.url(url).get();
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -99,7 +93,7 @@ public class ImagebamRipper extends AbstractHTMLRipper {
|
||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||
try {
|
||||
// Attempt to use album title as GID
|
||||
Elements elems = getFirstPage().select("[id=gallery-name]");
|
||||
Elements elems = getCachedFirstPage().select("[id=gallery-name]");
|
||||
String title = elems.first().text();
|
||||
LOGGER.info("Title text: '" + title + "'");
|
||||
if (StringUtils.isNotBlank(title)) {
|
||||
|
@@ -17,7 +17,6 @@ import com.rarchives.ripme.utils.Http;
|
||||
|
||||
public class ImagefapRipper extends AbstractHTMLRipper {
|
||||
|
||||
private Document albumDoc = null;
|
||||
private boolean isNewAlbumType = false;
|
||||
|
||||
private int callsMade = 0;
|
||||
@@ -109,10 +108,7 @@ public class ImagefapRipper extends AbstractHTMLRipper {
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
if (albumDoc == null) {
|
||||
albumDoc = getPageWithRetries(url);
|
||||
}
|
||||
return albumDoc;
|
||||
return getPageWithRetries(url);
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -162,7 +158,7 @@ public class ImagefapRipper extends AbstractHTMLRipper {
|
||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||
try {
|
||||
// Attempt to use album title as GID
|
||||
String title = getFirstPage().title();
|
||||
String title = getCachedFirstPage().title();
|
||||
title = title.replace("Porn Pics & Porn GIFs", "");
|
||||
title = title.replace(" ", "_");
|
||||
String toReturn = getHost() + "_" + title + "_" + getGID(url);
|
||||
|
@@ -34,7 +34,7 @@ public class ViewcomicRipper extends AbstractHTMLRipper {
|
||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||
try {
|
||||
// Attempt to use album title as GID
|
||||
String titleText = getFirstPage().select("title").first().text();
|
||||
String titleText = getCachedFirstPage().select("title").first().text();
|
||||
String title = titleText.replace("Viewcomic reading comics online for free", "");
|
||||
title = title.replace("_", "");
|
||||
title = title.replace("|", "");
|
||||
|
@@ -220,7 +220,7 @@ public class XhamsterRipper extends AbstractHTMLRipper {
|
||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||
try {
|
||||
// Attempt to use album title and username as GID
|
||||
Document doc = getFirstPage();
|
||||
Document doc = getCachedFirstPage();
|
||||
Element user = doc.select("a.author").first();
|
||||
String username = user.text();
|
||||
String path = url.getPath();
|
||||
|
@@ -19,7 +19,6 @@ import com.rarchives.ripme.utils.Http;
|
||||
|
||||
public class ZizkiRipper extends AbstractHTMLRipper {
|
||||
|
||||
private Document albumDoc = null;
|
||||
private Map<String,String> cookies = new HashMap<>();
|
||||
|
||||
public ZizkiRipper(URL url) throws IOException {
|
||||
@@ -49,10 +48,10 @@ public class ZizkiRipper extends AbstractHTMLRipper {
|
||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||
try {
|
||||
// Attempt to use album title as GID
|
||||
Element titleElement = getFirstPage().select("h1.title").first();
|
||||
Element titleElement = getCachedFirstPage().select("h1.title").first();
|
||||
String title = titleElement.text();
|
||||
|
||||
Element authorSpan = getFirstPage().select("span[class=creator]").first();
|
||||
Element authorSpan = getCachedFirstPage().select("span[class=creator]").first();
|
||||
String author = authorSpan.select("a").first().text();
|
||||
LOGGER.debug("Author: " + author);
|
||||
return getHost() + "_" + author + "_" + title.trim();
|
||||
@@ -65,12 +64,9 @@ public class ZizkiRipper extends AbstractHTMLRipper {
|
||||
|
||||
@Override
|
||||
public Document getFirstPage() throws IOException {
|
||||
if (albumDoc == null) {
|
||||
Response resp = Http.url(url).response();
|
||||
cookies.putAll(resp.cookies());
|
||||
albumDoc = resp.parse();
|
||||
}
|
||||
return albumDoc;
|
||||
Response resp = Http.url(url).response();
|
||||
cookies.putAll(resp.cookies());
|
||||
return resp.parse();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
Reference in New Issue
Block a user