1
0
mirror of https://github.com/RipMeApp/ripme.git synced 2025-08-26 07:14:38 +02:00

Updating Jab Archives ripper to add image title

This will now include the image title in the saved filename when writing
the final text files, which fixes duplicate files and naming conflicts from
downloading the same gallery multiple times.
This commit is contained in:
0x1f595
2018-12-28 16:38:02 -07:00
parent 58eaa818e4
commit 45fb6dc4e8

View File

@@ -7,6 +7,12 @@ import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Locale;
import java.text.Normalizer;
import java.text.Normalizer.Form;
import com.rarchives.ripme.ripper.AbstractHTMLRipper;
import com.rarchives.ripme.utils.Http;
@@ -17,6 +23,11 @@ import org.jsoup.select.Elements;
public class JabArchivesRipper extends AbstractHTMLRipper {
private static final Pattern NONLATIN = Pattern.compile("[^\\w-]");
private static final Pattern WHITESPACE = Pattern.compile("[\\s]");
private Map<String, String> itemPrefixes = Collections.synchronizedMap(new HashMap<String, String>());
public JabArchivesRipper(URL url) throws IOException {
super(url);
}
@@ -62,17 +73,35 @@ public class JabArchivesRipper extends AbstractHTMLRipper {
return Http.url(nextUrl).get();
}
protected String getSlug(String input) {
// Get a URL/file-safe version of a string
String nowhitespace = WHITESPACE.matcher(input).replaceAll("-");
String normalized = Normalizer.normalize(nowhitespace, Form.NFD);
String slug = NONLATIN.matcher(normalized).replaceAll("");
return slug.toLowerCase(Locale.ENGLISH);
}
@Override
public List<String> getURLsFromPage(Document doc) {
List<String> result = new ArrayList<String>();
for (Element el : doc.select("#contentMain img")) {
result.add("https://jabarchives.com" + el.attr("src").replace("thumb", "large"));
String url = "https://jabarchives.com" + el.attr("src").replace("thumb", "large");
result.add(url);
String title = el.parent().attr("title");
itemPrefixes.put(url, getSlug(title) + "_");
}
return result;
}
@Override
public void downloadURL(URL url, int index) {
addURLToDownload(url, getPrefix(index));
String prefix = "";
if (itemPrefixes.containsKey(url.toString())) {
System.out.println("Found matching prefix:");
prefix = itemPrefixes.get(url.toString());
System.out.println(prefix);
}
addURLToDownload(url, prefix);
}
}