From 70799e39b978c33069c3e7afb8f08965103a835d Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Fri, 14 Sep 2018 13:44:23 -0400 Subject: [PATCH 1/4] refactored ehentai tag blacklisting --- .../ripme/ripper/rippers/EHentaiRipper.java | 30 ++----------------- 1 file changed, 3 insertions(+), 27 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/EHentaiRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/EHentaiRipper.java index 69d778cf..54f449a1 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/EHentaiRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/EHentaiRipper.java @@ -12,6 +12,7 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; import com.rarchives.ripme.ui.RipStatusMessage; +import com.rarchives.ripme.utils.RipUtils; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; @@ -126,32 +127,7 @@ public class EHentaiRipper extends AbstractHTMLRipper { } } - /** - * Checks for blacklisted tags on page. If it finds one it returns it, if not it return null - * - * @param doc - * @return String - */ - public String checkTags(Document doc, String[] blackListedTags) { - // If the user hasn't blacklisted any tags we return null; - if (blackListedTags == null) { - return null; - } - LOGGER.info("Blacklisted tags " + blackListedTags[0]); - List tagsOnPage = getTags(doc); - for (String tag : blackListedTags) { - for (String pageTag : tagsOnPage) { - // We replace all dashes in the tag with spaces because the tags we get from the site are separated using - // dashes - if (tag.trim().toLowerCase().equals(pageTag.toLowerCase())) { - return tag; - } - } - } - return null; - } - - private List getTags(Document doc) { + public List getTags(Document doc) { List tags = new ArrayList<>(); LOGGER.info("Getting tags"); for (Element tag : doc.select("td > div > a")) { @@ -169,7 +145,7 @@ public class EHentaiRipper extends AbstractHTMLRipper { } this.lastURL = this.url.toExternalForm(); LOGGER.info("Checking blacklist"); - String blacklistedTag = checkTags(albumDoc, Utils.getConfigStringArray("ehentai.blacklist.tags")); + String blacklistedTag = RipUtils.checkTags(Utils.getConfigStringArray("ehentai.blacklist.tags"), getTags(albumDoc)); if (blacklistedTag != null) { sendUpdate(RipStatusMessage.STATUS.DOWNLOAD_WARN, "Skipping " + url.toExternalForm() + " as it " + "contains the blacklisted tag \"" + blacklistedTag + "\""); From 1ecef7356a9df49f51d5662ed4746ccdc7f4a917 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Fri, 14 Sep 2018 13:44:47 -0400 Subject: [PATCH 2/4] refactored ehentai unit tests --- .../ripme/tst/ripper/rippers/EhentaiRipperTest.java | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/EhentaiRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/EhentaiRipperTest.java index cdab6b73..8e87f8a5 100644 --- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/EhentaiRipperTest.java +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/EhentaiRipperTest.java @@ -2,8 +2,10 @@ package com.rarchives.ripme.tst.ripper.rippers; import java.io.IOException; import java.net.URL; +import java.util.List; import com.rarchives.ripme.ripper.rippers.EHentaiRipper; +import com.rarchives.ripme.utils.RipUtils; public class EhentaiRipperTest extends RippersTest { public void testEHentaiAlbum() throws IOException { @@ -15,14 +17,15 @@ public class EhentaiRipperTest extends RippersTest { public void testTagBlackList() throws IOException { URL url = new URL("https://e-hentai.org/g/1228503/1a2f455f96/"); EHentaiRipper ripper = new EHentaiRipper(url); + List tagsOnPage = ripper.getTags(ripper.getFirstPage()); // Test multiple blacklisted tags String[] tags = {"test", "one", "yuri"}; - String blacklistedTag = ripper.checkTags(ripper.getFirstPage(), tags); + String blacklistedTag = RipUtils.checkTags(tags, tagsOnPage); assertEquals("yuri", blacklistedTag); // test tags with spaces in them String[] tags2 = {"test", "one", "midnight on mars"}; - blacklistedTag = ripper.checkTags(ripper.getFirstPage(), tags2); + blacklistedTag = RipUtils.checkTags(tags2, tagsOnPage); assertEquals("midnight on mars", blacklistedTag); } } \ No newline at end of file From e7df21b07f60be7edd7c1336bb11da9727e145bd Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Fri, 14 Sep 2018 13:52:13 -0400 Subject: [PATCH 3/4] refactored nhentai tag blacklisting --- .../ripme/ripper/rippers/NhentaiRipper.java | 34 ++++--------------- 1 file changed, 6 insertions(+), 28 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/NhentaiRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/NhentaiRipper.java index daef205e..49fc1d8a 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/NhentaiRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/NhentaiRipper.java @@ -4,6 +4,7 @@ import com.rarchives.ripme.ripper.AbstractHTMLRipper; import com.rarchives.ripme.ripper.DownloadThreadPool; import com.rarchives.ripme.ui.RipStatusMessage; import com.rarchives.ripme.utils.Http; +import com.rarchives.ripme.utils.RipUtils; import com.rarchives.ripme.utils.Utils; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; @@ -82,39 +83,16 @@ public class NhentaiRipper extends AbstractHTMLRipper { return "nhentai" + title; } - private List getTags(Document doc) { + public List getTags(Document doc) { List tags = new ArrayList<>(); for (Element tag : doc.select("a.tag")) { - tags.add(tag.attr("href").replaceAll("/tag/", "").replaceAll("/", "")); + String tagString = tag.attr("href").replaceAll("/tag/", "").replaceAll("/", ""); + LOGGER.info("Found tag: " + tagString); + tags.add(tagString); } return tags; } - /** - * Checks for blacklisted tags on page. If it finds one it returns it, if not it return null - * - * @param doc - * @return String - */ - public String checkTags(Document doc, String[] blackListedTags) { - // If the user hasn't blacklisted any tags we return false; - if (blackListedTags == null) { - return null; - } - LOGGER.info("Blacklisted tags " + blackListedTags[0]); - List tagsOnPage = getTags(doc); - for (String tag : blackListedTags) { - for (String pageTag : tagsOnPage) { - // We replace all dashes in the tag with spaces because the tags we get from the site are separated using - // dashes - if (tag.trim().toLowerCase().equals(pageTag.replaceAll("-", " ").toLowerCase())) { - return tag; - } - } - } - return null; - } - @Override public String getGID(URL url) throws MalformedURLException { // Ex: https://nhentai.net/g/159174/ @@ -134,7 +112,7 @@ public class NhentaiRipper extends AbstractHTMLRipper { firstPage = Http.url(url).get(); } - String blacklistedTag = checkTags(firstPage, Utils.getConfigStringArray("nhentai.blacklist.tags")); + String blacklistedTag = RipUtils.checkTags(Utils.getConfigStringArray("nhentai.blacklist.tags"), getTags(firstPage)); if (blacklistedTag != null) { sendUpdate(RipStatusMessage.STATUS.DOWNLOAD_WARN, "Skipping " + url.toExternalForm() + " as it " + "contains the blacklisted tag \"" + blacklistedTag + "\""); From ab17ed26cefb5a7a29daf17f6ee3096f59bb0063 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Fri, 14 Sep 2018 13:52:27 -0400 Subject: [PATCH 4/4] refactored nhentai unit tests --- .../ripme/tst/ripper/rippers/NhentaiRipperTest.java | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/NhentaiRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/NhentaiRipperTest.java index 108feed2..f6418ef7 100644 --- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/NhentaiRipperTest.java +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/NhentaiRipperTest.java @@ -2,8 +2,10 @@ package com.rarchives.ripme.tst.ripper.rippers; import java.io.IOException; import java.net.URL; +import java.util.List; import com.rarchives.ripme.ripper.rippers.NhentaiRipper; +import com.rarchives.ripme.utils.RipUtils; public class NhentaiRipperTest extends RippersTest { public void testRip() throws IOException { @@ -20,14 +22,15 @@ public class NhentaiRipperTest extends RippersTest { public void testTagBlackList() throws IOException { URL url = new URL("https://nhentai.net/g/233295/"); NhentaiRipper ripper = new NhentaiRipper(url); + List tagsOnPage = ripper.getTags(ripper.getFirstPage()); // Test multiple blacklisted tags String[] tags = {"test", "one", "blowjob"}; - String blacklistedTag = ripper.checkTags(ripper.getFirstPage(), tags); + String blacklistedTag = RipUtils.checkTags(tags, tagsOnPage); assertEquals("blowjob", blacklistedTag); // test tags with spaces in them - String[] tags2 = {"test", "one", "sole female"}; - blacklistedTag = ripper.checkTags(ripper.getFirstPage(), tags2); - assertEquals("sole female", blacklistedTag); + String[] tags2 = {"test", "one", "sole-female"}; + blacklistedTag = RipUtils.checkTags(tags2, tagsOnPage); + assertEquals("sole-female", blacklistedTag); } }