1
0
mirror of https://github.com/RipMeApp/ripme.git synced 2025-02-21 06:32:42 +01:00

Merge pull request #1778 from Frankenst1/master

Fix imagefap ripper to take rate limit into account.
This commit is contained in:
cyian-1756 2020-10-29 16:08:52 +00:00 committed by GitHub
commit d1c11fab97
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -12,6 +12,7 @@ import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.ui.RipStatusMessage.STATUS;
import com.rarchives.ripme.utils.Http;
public class ImagefapRipper extends AbstractHTMLRipper {
@ -19,6 +20,18 @@ public class ImagefapRipper extends AbstractHTMLRipper {
private Document albumDoc = null;
private boolean isNewAlbumType = false;
private int callsMade = 0;
private long startTime = System.nanoTime();
private static final int RETRY_LIMIT = 10;
private static final int RATE_LIMIT_HOUR = 1000;
// All sleep times are in milliseconds
private static final int PAGE_SLEEP_TIME = 60 * 60 * 1000 / RATE_LIMIT_HOUR;
private static final int IMAGE_SLEEP_TIME = 60 * 60 * 1000 / RATE_LIMIT_HOUR;
// Timeout when blocked = 1 hours. Retry every retry within the hour mark + 1 time after the hour mark.
private static final int IP_BLOCK_SLEEP_TIME = (int) Math.round((double) 60 / (RETRY_LIMIT - 1) * 60 * 1000);
public ImagefapRipper(URL url) throws IOException {
super(url);
}
@ -97,7 +110,7 @@ public class ImagefapRipper extends AbstractHTMLRipper {
@Override
public Document getFirstPage() throws IOException {
if (albumDoc == null) {
albumDoc = Http.url(url).get();
albumDoc = getPageWithRetries(url);
}
return albumDoc;
}
@ -114,8 +127,13 @@ public class ImagefapRipper extends AbstractHTMLRipper {
if (nextURL == null) {
throw new IOException("No next page found");
}
sleep(1000);
return Http.url(nextURL).get();
// Sleep before fetching next page.
sleep(PAGE_SLEEP_TIME);
// Load next page
Document nextPage = getPageWithRetries(new URL(nextURL));
return nextPage;
}
@Override
@ -156,11 +174,79 @@ public class ImagefapRipper extends AbstractHTMLRipper {
private String getFullSizedImage(String pageURL) {
try {
Document doc = Http.url(pageURL).get();
// Sleep before fetching image.
sleep(IMAGE_SLEEP_TIME);
Document doc = getPageWithRetries(new URL(pageURL));
return doc.select("img#mainPhoto").attr("src");
} catch (IOException e) {
return null;
}
}
/**
* Attempts to get page, checks for IP ban, waits.
* @param url
* @return Page document
* @throws IOException If page loading errors, or if retries are exhausted
*/
private Document getPageWithRetries(URL url) throws IOException {
Document doc;
int retries = RETRY_LIMIT;
while (true) {
sendUpdate(STATUS.LOADING_RESOURCE, url.toExternalForm());
// For debugging rate limit checker. Useful to track wheter the timeout should be altered or not.
callsMade++;
checkRateLimit();
LOGGER.info("Retrieving " + url);
doc = Http.url(url)
.get();
if (doc.toString().contains("Your IP made too many requests to our servers and we need to check that you are a real human being")) {
if (retries == 0) {
throw new IOException("Hit rate limit and maximum number of retries, giving up");
}
String message = "Hit rate limit while loading " + url + ", sleeping for " + IP_BLOCK_SLEEP_TIME + "ms, " + retries + " retries remaining";
LOGGER.warn(message);
sendUpdate(STATUS.DOWNLOAD_WARN, message);
retries--;
try {
Thread.sleep(IP_BLOCK_SLEEP_TIME);
} catch (InterruptedException e) {
throw new IOException("Interrupted while waiting for rate limit to subside");
}
}
else {
return doc;
}
}
}
/**
* Used for debugging the rate limit issue.
* This in order to prevent hitting the rate limit altoghether by remaining under the limit threshold.
* @return Long duration
*/
private long checkRateLimit() {
long endTime = System.nanoTime();
long duration = (endTime - startTime) / 1000000;
int rateLimitMinute = 100;
int rateLimitFiveMinutes = 200;
int rateLimitHour = RATE_LIMIT_HOUR; // Request allowed every 3.6 seconds.
if(duration / 1000 < 60){
LOGGER.debug("Rate limit: " + (rateLimitMinute - callsMade) + " calls remaining for first minute mark.");
} else if(duration / 1000 < 300){
LOGGER.debug("Rate limit: " + (rateLimitFiveMinutes - callsMade) + " calls remaining for first 5 minute mark.");
} else if(duration / 1000 < 3600){
LOGGER.debug("Rate limit: " + (RATE_LIMIT_HOUR - callsMade) + " calls remaining for first hour mark.");
}
return duration;
}
}