From 45fb6dc4e883e4eca5ea0c8f7df75f756c60bf2c Mon Sep 17 00:00:00 2001 From: 0x1f595 <0x1f595@users.noreply.github.com> Date: Fri, 28 Dec 2018 16:38:02 -0700 Subject: [PATCH] Updating Jab Archives ripper to add image title This will now include the image title in the saved filename when writing the final text files, which fixes duplicate files and naming conflicts from downloading the same gallery multiple times. --- .../ripper/rippers/JabArchivesRipper.java | 33 +++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/JabArchivesRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/JabArchivesRipper.java index 90f274f1..11c5138a 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/JabArchivesRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/JabArchivesRipper.java @@ -7,6 +7,12 @@ import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.Locale; +import java.text.Normalizer; +import java.text.Normalizer.Form; import com.rarchives.ripme.ripper.AbstractHTMLRipper; import com.rarchives.ripme.utils.Http; @@ -17,6 +23,11 @@ import org.jsoup.select.Elements; public class JabArchivesRipper extends AbstractHTMLRipper { + private static final Pattern NONLATIN = Pattern.compile("[^\\w-]"); + private static final Pattern WHITESPACE = Pattern.compile("[\\s]"); + + private Map itemPrefixes = Collections.synchronizedMap(new HashMap()); + public JabArchivesRipper(URL url) throws IOException { super(url); } @@ -62,17 +73,35 @@ public class JabArchivesRipper extends AbstractHTMLRipper { return Http.url(nextUrl).get(); } + protected String getSlug(String input) { + // Get a URL/file-safe version of a string + String nowhitespace = WHITESPACE.matcher(input).replaceAll("-"); + String normalized = Normalizer.normalize(nowhitespace, Form.NFD); + String slug = NONLATIN.matcher(normalized).replaceAll(""); + return slug.toLowerCase(Locale.ENGLISH); + } + @Override public List getURLsFromPage(Document doc) { List result = new ArrayList(); for (Element el : doc.select("#contentMain img")) { - result.add("https://jabarchives.com" + el.attr("src").replace("thumb", "large")); + String url = "https://jabarchives.com" + el.attr("src").replace("thumb", "large"); + result.add(url); + + String title = el.parent().attr("title"); + itemPrefixes.put(url, getSlug(title) + "_"); } return result; } @Override public void downloadURL(URL url, int index) { - addURLToDownload(url, getPrefix(index)); + String prefix = ""; + if (itemPrefixes.containsKey(url.toString())) { + System.out.println("Found matching prefix:"); + prefix = itemPrefixes.get(url.toString()); + System.out.println(prefix); + } + addURLToDownload(url, prefix); } }