mirror of
https://github.com/RipMeApp/ripme.git
synced 2025-09-02 10:23:47 +02:00
Implement caching of the first page for every ripper
This commit is contained in:
@@ -32,6 +32,7 @@ public abstract class AbstractHTMLRipper extends AbstractRipper {
|
|||||||
private final Map<URL, File> itemsPending = Collections.synchronizedMap(new HashMap<>());
|
private final Map<URL, File> itemsPending = Collections.synchronizedMap(new HashMap<>());
|
||||||
private final Map<URL, Path> itemsCompleted = Collections.synchronizedMap(new HashMap<>());
|
private final Map<URL, Path> itemsCompleted = Collections.synchronizedMap(new HashMap<>());
|
||||||
private final Map<URL, String> itemsErrored = Collections.synchronizedMap(new HashMap<>());
|
private final Map<URL, String> itemsErrored = Collections.synchronizedMap(new HashMap<>());
|
||||||
|
Document cachedFirstPage;
|
||||||
|
|
||||||
protected AbstractHTMLRipper(URL url) throws IOException {
|
protected AbstractHTMLRipper(URL url) throws IOException {
|
||||||
super(url);
|
super(url);
|
||||||
@@ -41,6 +42,14 @@ public abstract class AbstractHTMLRipper extends AbstractRipper {
|
|||||||
public abstract String getHost();
|
public abstract String getHost();
|
||||||
|
|
||||||
protected abstract Document getFirstPage() throws IOException;
|
protected abstract Document getFirstPage() throws IOException;
|
||||||
|
|
||||||
|
protected Document getCachedFirstPage() throws IOException {
|
||||||
|
if (cachedFirstPage == null) {
|
||||||
|
cachedFirstPage = getFirstPage();
|
||||||
|
}
|
||||||
|
return cachedFirstPage;
|
||||||
|
}
|
||||||
|
|
||||||
public Document getNextPage(Document doc) throws IOException {
|
public Document getNextPage(Document doc) throws IOException {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
@@ -98,7 +107,7 @@ public abstract class AbstractHTMLRipper extends AbstractRipper {
|
|||||||
int textindex = 0;
|
int textindex = 0;
|
||||||
LOGGER.info("Retrieving " + this.url);
|
LOGGER.info("Retrieving " + this.url);
|
||||||
sendUpdate(STATUS.LOADING_RESOURCE, this.url.toExternalForm());
|
sendUpdate(STATUS.LOADING_RESOURCE, this.url.toExternalForm());
|
||||||
Document doc = getFirstPage();
|
var doc = getCachedFirstPage();
|
||||||
|
|
||||||
if (hasQueueSupport() && pageContainsAlbums(this.url)) {
|
if (hasQueueSupport() && pageContainsAlbums(this.url)) {
|
||||||
List<String> urls = getAlbumsToQueue(doc);
|
List<String> urls = getAlbumsToQueue(doc);
|
||||||
|
@@ -49,7 +49,7 @@ public class AerisdiesRipper extends AbstractHTMLRipper {
|
|||||||
@Override
|
@Override
|
||||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||||
try {
|
try {
|
||||||
Element el = getFirstPage().select(".headtext").first();
|
Element el = getCachedFirstPage().select(".headtext").first();
|
||||||
if (el == null) {
|
if (el == null) {
|
||||||
throw new IOException("Unable to get album title");
|
throw new IOException("Unable to get album title");
|
||||||
}
|
}
|
||||||
|
@@ -73,7 +73,7 @@ public class BatoRipper extends AbstractHTMLRipper {
|
|||||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||||
try {
|
try {
|
||||||
// Attempt to use album title as GID
|
// Attempt to use album title as GID
|
||||||
return getHost() + "_" + getGID(url) + "_" + getFirstPage().select("title").first().text().replaceAll(" ", "_");
|
return getHost() + "_" + getGID(url) + "_" + getCachedFirstPage().select("title").first().text().replaceAll(" ", "_");
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
// Fall back to default album naming convention
|
// Fall back to default album naming convention
|
||||||
LOGGER.info("Unable to find title at " + url);
|
LOGGER.info("Unable to find title at " + url);
|
||||||
|
@@ -104,7 +104,7 @@ public class ChanRipper extends AbstractHTMLRipper {
|
|||||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||||
try {
|
try {
|
||||||
// Attempt to use album title as GID
|
// Attempt to use album title as GID
|
||||||
Document doc = getFirstPage();
|
Document doc = getCachedFirstPage();
|
||||||
try {
|
try {
|
||||||
String subject = doc.select(".post.op > .postinfo > .subject").first().text();
|
String subject = doc.select(".post.op > .postinfo > .subject").first().text();
|
||||||
return getHost() + "_" + getGID(url) + "_" + subject;
|
return getHost() + "_" + getGID(url) + "_" + subject;
|
||||||
|
@@ -53,7 +53,7 @@ public class CheveretoRipper extends AbstractHTMLRipper {
|
|||||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||||
try {
|
try {
|
||||||
// Attempt to use album title as GID
|
// Attempt to use album title as GID
|
||||||
Element titleElement = getFirstPage().select("meta[property=og:title]").first();
|
Element titleElement = getCachedFirstPage().select("meta[property=og:title]").first();
|
||||||
String title = titleElement.attr("content");
|
String title = titleElement.attr("content");
|
||||||
title = title.substring(title.lastIndexOf('/') + 1);
|
title = title.substring(title.lastIndexOf('/') + 1);
|
||||||
return getHost() + "_" + title.trim();
|
return getHost() + "_" + title.trim();
|
||||||
|
@@ -23,7 +23,6 @@ import com.rarchives.ripme.utils.Http;
|
|||||||
|
|
||||||
public class EightmusesRipper extends AbstractHTMLRipper {
|
public class EightmusesRipper extends AbstractHTMLRipper {
|
||||||
|
|
||||||
private Document albumDoc = null;
|
|
||||||
private Map<String,String> cookies = new HashMap<>();
|
private Map<String,String> cookies = new HashMap<>();
|
||||||
// TODO put up a wiki page on using maps to store titles
|
// TODO put up a wiki page on using maps to store titles
|
||||||
// the map for storing the title of each album when downloading sub albums
|
// the map for storing the title of each album when downloading sub albums
|
||||||
@@ -64,7 +63,7 @@ public class EightmusesRipper extends AbstractHTMLRipper {
|
|||||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||||
try {
|
try {
|
||||||
// Attempt to use album title as GID
|
// Attempt to use album title as GID
|
||||||
Element titleElement = getFirstPage().select("meta[name=description]").first();
|
Element titleElement = getCachedFirstPage().select("meta[name=description]").first();
|
||||||
String title = titleElement.attr("content");
|
String title = titleElement.attr("content");
|
||||||
title = title.replace("A huge collection of free porn comics for adults. Read", "");
|
title = title.replace("A huge collection of free porn comics for adults. Read", "");
|
||||||
title = title.replace("online for free at 8muses.com", "");
|
title = title.replace("online for free at 8muses.com", "");
|
||||||
@@ -78,12 +77,9 @@ public class EightmusesRipper extends AbstractHTMLRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Document getFirstPage() throws IOException {
|
public Document getFirstPage() throws IOException {
|
||||||
if (albumDoc == null) {
|
Response resp = Http.url(url).response();
|
||||||
Response resp = Http.url(url).response();
|
cookies.putAll(resp.cookies());
|
||||||
cookies.putAll(resp.cookies());
|
return resp.parse();
|
||||||
albumDoc = resp.parse();
|
|
||||||
}
|
|
||||||
return albumDoc;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@@ -97,7 +97,7 @@ public class EroShareRipper extends AbstractHTMLRipper {
|
|||||||
if (!is_profile(url)) {
|
if (!is_profile(url)) {
|
||||||
try {
|
try {
|
||||||
// Attempt to use album title as GID
|
// Attempt to use album title as GID
|
||||||
Element titleElement = getFirstPage().select("meta[property=og:title]").first();
|
Element titleElement = getCachedFirstPage().select("meta[property=og:title]").first();
|
||||||
String title = titleElement.attr("content");
|
String title = titleElement.attr("content");
|
||||||
title = title.substring(title.lastIndexOf('/') + 1);
|
title = title.substring(title.lastIndexOf('/') + 1);
|
||||||
return getHost() + "_" + getGID(url) + "_" + title.trim();
|
return getHost() + "_" + getGID(url) + "_" + title.trim();
|
||||||
|
@@ -71,7 +71,7 @@ public class EromeRipper extends AbstractHTMLRipper {
|
|||||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||||
try {
|
try {
|
||||||
// Attempt to use album title as GID
|
// Attempt to use album title as GID
|
||||||
Element titleElement = getFirstPage().select("meta[property=og:title]").first();
|
Element titleElement = getCachedFirstPage().select("meta[property=og:title]").first();
|
||||||
String title = titleElement.attr("content");
|
String title = titleElement.attr("content");
|
||||||
title = title.substring(title.lastIndexOf('/') + 1);
|
title = title.substring(title.lastIndexOf('/') + 1);
|
||||||
return getHost() + "_" + getGID(url) + "_" + title.trim();
|
return getHost() + "_" + getGID(url) + "_" + title.trim();
|
||||||
|
@@ -20,7 +20,6 @@ import org.jsoup.nodes.Element;
|
|||||||
|
|
||||||
public class FlickrRipper extends AbstractHTMLRipper {
|
public class FlickrRipper extends AbstractHTMLRipper {
|
||||||
|
|
||||||
private Document albumDoc = null;
|
|
||||||
private final DownloadThreadPool flickrThreadPool;
|
private final DownloadThreadPool flickrThreadPool;
|
||||||
|
|
||||||
private enum UrlType {
|
private enum UrlType {
|
||||||
@@ -178,7 +177,7 @@ public class FlickrRipper extends AbstractHTMLRipper {
|
|||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
// Attempt to use album title as GID
|
// Attempt to use album title as GID
|
||||||
Document doc = getFirstPage();
|
Document doc = getCachedFirstPage();
|
||||||
String user = url.toExternalForm();
|
String user = url.toExternalForm();
|
||||||
user = user.substring(user.indexOf("/photos/") + "/photos/".length());
|
user = user.substring(user.indexOf("/photos/") + "/photos/".length());
|
||||||
user = user.substring(0, user.indexOf("/"));
|
user = user.substring(0, user.indexOf("/"));
|
||||||
@@ -230,10 +229,7 @@ public class FlickrRipper extends AbstractHTMLRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Document getFirstPage() throws IOException {
|
public Document getFirstPage() throws IOException {
|
||||||
if (albumDoc == null) {
|
return Http.url(url).get();
|
||||||
albumDoc = Http.url(url).get();
|
|
||||||
}
|
|
||||||
return albumDoc;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@@ -16,8 +16,6 @@ import com.rarchives.ripme.ripper.AbstractHTMLRipper;
|
|||||||
import com.rarchives.ripme.utils.Http;
|
import com.rarchives.ripme.utils.Http;
|
||||||
|
|
||||||
public class GirlsOfDesireRipper extends AbstractHTMLRipper {
|
public class GirlsOfDesireRipper extends AbstractHTMLRipper {
|
||||||
// Current HTML document
|
|
||||||
private Document albumDoc = null;
|
|
||||||
|
|
||||||
public GirlsOfDesireRipper(URL url) throws IOException {
|
public GirlsOfDesireRipper(URL url) throws IOException {
|
||||||
super(url);
|
super(url);
|
||||||
@@ -35,7 +33,7 @@ public class GirlsOfDesireRipper extends AbstractHTMLRipper {
|
|||||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||||
try {
|
try {
|
||||||
// Attempt to use album title as GID
|
// Attempt to use album title as GID
|
||||||
Document doc = getFirstPage();
|
Document doc = getCachedFirstPage();
|
||||||
Elements elems = doc.select(".albumName");
|
Elements elems = doc.select(".albumName");
|
||||||
return getHost() + "_" + elems.first().text();
|
return getHost() + "_" + elems.first().text();
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
@@ -64,10 +62,7 @@ public class GirlsOfDesireRipper extends AbstractHTMLRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Document getFirstPage() throws IOException {
|
public Document getFirstPage() throws IOException {
|
||||||
if (albumDoc == null) {
|
return Http.url(url).get();
|
||||||
albumDoc = Http.url(url).get();
|
|
||||||
}
|
|
||||||
return albumDoc;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@@ -51,7 +51,7 @@ public class HbrowseRipper extends AbstractHTMLRipper {
|
|||||||
@Override
|
@Override
|
||||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||||
try {
|
try {
|
||||||
Document doc = getFirstPage();
|
Document doc = getCachedFirstPage();
|
||||||
String title = doc.select("div[id=main] > table.listTable > tbody > tr > td.listLong").first().text();
|
String title = doc.select("div[id=main] > table.listTable > tbody > tr > td.listLong").first().text();
|
||||||
return getHost() + "_" + title + "_" + getGID(url);
|
return getHost() + "_" + title + "_" + getGID(url);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
|
@@ -61,7 +61,7 @@ public class HentaifoxRipper extends AbstractHTMLRipper {
|
|||||||
@Override
|
@Override
|
||||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||||
try {
|
try {
|
||||||
Document doc = getFirstPage();
|
Document doc = getCachedFirstPage();
|
||||||
String title = doc.select("div.info > h1").first().text();
|
String title = doc.select("div.info > h1").first().text();
|
||||||
return getHost() + "_" + title + "_" + getGID(url);
|
return getHost() + "_" + title + "_" + getGID(url);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
|
@@ -19,9 +19,6 @@ import org.jsoup.select.Elements;
|
|||||||
|
|
||||||
public class ImagebamRipper extends AbstractHTMLRipper {
|
public class ImagebamRipper extends AbstractHTMLRipper {
|
||||||
|
|
||||||
// Current HTML document
|
|
||||||
private Document albumDoc = null;
|
|
||||||
|
|
||||||
// Thread pool for finding direct image links from "image" pages (html)
|
// Thread pool for finding direct image links from "image" pages (html)
|
||||||
private DownloadThreadPool imagebamThreadPool = new DownloadThreadPool("imagebam");
|
private DownloadThreadPool imagebamThreadPool = new DownloadThreadPool("imagebam");
|
||||||
@Override
|
@Override
|
||||||
@@ -61,10 +58,7 @@ public class ImagebamRipper extends AbstractHTMLRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Document getFirstPage() throws IOException {
|
public Document getFirstPage() throws IOException {
|
||||||
if (albumDoc == null) {
|
return Http.url(url).get();
|
||||||
albumDoc = Http.url(url).get();
|
|
||||||
}
|
|
||||||
return albumDoc;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@@ -99,7 +93,7 @@ public class ImagebamRipper extends AbstractHTMLRipper {
|
|||||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||||
try {
|
try {
|
||||||
// Attempt to use album title as GID
|
// Attempt to use album title as GID
|
||||||
Elements elems = getFirstPage().select("[id=gallery-name]");
|
Elements elems = getCachedFirstPage().select("[id=gallery-name]");
|
||||||
String title = elems.first().text();
|
String title = elems.first().text();
|
||||||
LOGGER.info("Title text: '" + title + "'");
|
LOGGER.info("Title text: '" + title + "'");
|
||||||
if (StringUtils.isNotBlank(title)) {
|
if (StringUtils.isNotBlank(title)) {
|
||||||
|
@@ -17,7 +17,6 @@ import com.rarchives.ripme.utils.Http;
|
|||||||
|
|
||||||
public class ImagefapRipper extends AbstractHTMLRipper {
|
public class ImagefapRipper extends AbstractHTMLRipper {
|
||||||
|
|
||||||
private Document albumDoc = null;
|
|
||||||
private boolean isNewAlbumType = false;
|
private boolean isNewAlbumType = false;
|
||||||
|
|
||||||
private int callsMade = 0;
|
private int callsMade = 0;
|
||||||
@@ -109,10 +108,7 @@ public class ImagefapRipper extends AbstractHTMLRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Document getFirstPage() throws IOException {
|
public Document getFirstPage() throws IOException {
|
||||||
if (albumDoc == null) {
|
return getPageWithRetries(url);
|
||||||
albumDoc = getPageWithRetries(url);
|
|
||||||
}
|
|
||||||
return albumDoc;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@@ -162,7 +158,7 @@ public class ImagefapRipper extends AbstractHTMLRipper {
|
|||||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||||
try {
|
try {
|
||||||
// Attempt to use album title as GID
|
// Attempt to use album title as GID
|
||||||
String title = getFirstPage().title();
|
String title = getCachedFirstPage().title();
|
||||||
title = title.replace("Porn Pics & Porn GIFs", "");
|
title = title.replace("Porn Pics & Porn GIFs", "");
|
||||||
title = title.replace(" ", "_");
|
title = title.replace(" ", "_");
|
||||||
String toReturn = getHost() + "_" + title + "_" + getGID(url);
|
String toReturn = getHost() + "_" + title + "_" + getGID(url);
|
||||||
|
@@ -34,7 +34,7 @@ public class ViewcomicRipper extends AbstractHTMLRipper {
|
|||||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||||
try {
|
try {
|
||||||
// Attempt to use album title as GID
|
// Attempt to use album title as GID
|
||||||
String titleText = getFirstPage().select("title").first().text();
|
String titleText = getCachedFirstPage().select("title").first().text();
|
||||||
String title = titleText.replace("Viewcomic reading comics online for free", "");
|
String title = titleText.replace("Viewcomic reading comics online for free", "");
|
||||||
title = title.replace("_", "");
|
title = title.replace("_", "");
|
||||||
title = title.replace("|", "");
|
title = title.replace("|", "");
|
||||||
|
@@ -220,7 +220,7 @@ public class XhamsterRipper extends AbstractHTMLRipper {
|
|||||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||||
try {
|
try {
|
||||||
// Attempt to use album title and username as GID
|
// Attempt to use album title and username as GID
|
||||||
Document doc = getFirstPage();
|
Document doc = getCachedFirstPage();
|
||||||
Element user = doc.select("a.author").first();
|
Element user = doc.select("a.author").first();
|
||||||
String username = user.text();
|
String username = user.text();
|
||||||
String path = url.getPath();
|
String path = url.getPath();
|
||||||
|
@@ -19,7 +19,6 @@ import com.rarchives.ripme.utils.Http;
|
|||||||
|
|
||||||
public class ZizkiRipper extends AbstractHTMLRipper {
|
public class ZizkiRipper extends AbstractHTMLRipper {
|
||||||
|
|
||||||
private Document albumDoc = null;
|
|
||||||
private Map<String,String> cookies = new HashMap<>();
|
private Map<String,String> cookies = new HashMap<>();
|
||||||
|
|
||||||
public ZizkiRipper(URL url) throws IOException {
|
public ZizkiRipper(URL url) throws IOException {
|
||||||
@@ -49,10 +48,10 @@ public class ZizkiRipper extends AbstractHTMLRipper {
|
|||||||
public String getAlbumTitle(URL url) throws MalformedURLException {
|
public String getAlbumTitle(URL url) throws MalformedURLException {
|
||||||
try {
|
try {
|
||||||
// Attempt to use album title as GID
|
// Attempt to use album title as GID
|
||||||
Element titleElement = getFirstPage().select("h1.title").first();
|
Element titleElement = getCachedFirstPage().select("h1.title").first();
|
||||||
String title = titleElement.text();
|
String title = titleElement.text();
|
||||||
|
|
||||||
Element authorSpan = getFirstPage().select("span[class=creator]").first();
|
Element authorSpan = getCachedFirstPage().select("span[class=creator]").first();
|
||||||
String author = authorSpan.select("a").first().text();
|
String author = authorSpan.select("a").first().text();
|
||||||
LOGGER.debug("Author: " + author);
|
LOGGER.debug("Author: " + author);
|
||||||
return getHost() + "_" + author + "_" + title.trim();
|
return getHost() + "_" + author + "_" + title.trim();
|
||||||
@@ -65,12 +64,9 @@ public class ZizkiRipper extends AbstractHTMLRipper {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Document getFirstPage() throws IOException {
|
public Document getFirstPage() throws IOException {
|
||||||
if (albumDoc == null) {
|
Response resp = Http.url(url).response();
|
||||||
Response resp = Http.url(url).response();
|
cookies.putAll(resp.cookies());
|
||||||
cookies.putAll(resp.cookies());
|
return resp.parse();
|
||||||
albumDoc = resp.parse();
|
|
||||||
}
|
|
||||||
return albumDoc;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
Reference in New Issue
Block a user