From 26e27a9ed546ed9417107f98ea4163f3023a2a04 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Thu, 13 Sep 2018 07:17:55 -0400 Subject: [PATCH 1/3] Added abilty to blacklist tags on tsumino --- .../ripme/ripper/rippers/TsuminoRipper.java | 46 ++++++++++++++++++- .../tst/ripper/rippers/TsuminoRipperTest.java | 17 ++++++- 2 files changed, 60 insertions(+), 3 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/TsuminoRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/TsuminoRipper.java index 4886503a..8855ec16 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/TsuminoRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/TsuminoRipper.java @@ -12,6 +12,7 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; import com.rarchives.ripme.ui.RipStatusMessage; +import com.rarchives.ripme.utils.Utils; import org.json.JSONArray; import org.json.JSONObject; import org.jsoup.Connection; @@ -21,6 +22,7 @@ import org.jsoup.nodes.Document; import com.rarchives.ripme.ripper.AbstractHTMLRipper; import com.rarchives.ripme.utils.Http; +import org.jsoup.nodes.Element; public class TsuminoRipper extends AbstractHTMLRipper { private Map cookies = new HashMap<>(); @@ -29,6 +31,41 @@ public class TsuminoRipper extends AbstractHTMLRipper { super(url); } + private List getTags(Document doc) { + List tags = new ArrayList<>(); + LOGGER.info("Getting tags"); + for (Element tag : doc.select("div#Tag > a")) { + LOGGER.info("Found tag " + tag.text()); + tags.add(tag.text().toLowerCase()); + } + return tags; + } + + /** + * Checks for blacklisted tags on page. If it finds one it returns it, if not it return null + * + * @param doc + * @return String + */ + public String checkTags(Document doc, String[] blackListedTags) { + // If the user hasn't blacklisted any tags we return null; + if (blackListedTags == null) { + return null; + } + LOGGER.info("Blacklisted tags " + blackListedTags[0]); + List tagsOnPage = getTags(doc); + for (String tag : blackListedTags) { + for (String pageTag : tagsOnPage) { + // We replace all dashes in the tag with spaces because the tags we get from the site are separated using + // dashes + if (tag.trim().toLowerCase().equals(pageTag.toLowerCase())) { + return tag.toLowerCase(); + } + } + } + return null; + } + private JSONArray getPageUrls() { String postURL = "http://www.tsumino.com/Read/Load"; try { @@ -86,7 +123,14 @@ public class TsuminoRipper extends AbstractHTMLRipper { public Document getFirstPage() throws IOException { Connection.Response resp = Http.url(url).response(); cookies.putAll(resp.cookies()); - return resp.parse(); + Document doc = resp.parse(); + String blacklistedTag = checkTags(doc, Utils.getConfigStringArray("tsumino.blacklist.tags")); + if (blacklistedTag != null) { + sendUpdate(RipStatusMessage.STATUS.DOWNLOAD_WARN, "Skipping " + url.toExternalForm() + " as it " + + "contains the blacklisted tag \"" + blacklistedTag + "\""); + return null; + } + return doc; } @Override diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/TsuminoRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/TsuminoRipperTest.java index 469c6810..889a43b4 100644 --- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/TsuminoRipperTest.java +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/TsuminoRipperTest.java @@ -2,13 +2,26 @@ package com.rarchives.ripme.tst.ripper.rippers; import java.io.IOException; import java.net.URL; +import java.util.List; import com.rarchives.ripme.ripper.rippers.TsuminoRipper; public class TsuminoRipperTest extends RippersTest { - public void testPahealRipper() throws IOException { - // a photo set + public void testTsuminoRipper() throws IOException { TsuminoRipper ripper = new TsuminoRipper(new URL("http://www.tsumino.com/Book/Info/42882/chaldea-maid-")); testRipper(ripper); } + + public void testTagBlackList() throws IOException { + TsuminoRipper ripper = new TsuminoRipper(new URL("http://www.tsumino.com/Book/Info/42882/chaldea-maid-")); + String[] tags1 = {"test", "one", "Blowjob"}; + String blacklistedTag = ripper.checkTags(ripper.getFirstPage(), tags1); + assertEquals("blowjob", blacklistedTag); + + // Test a tag with spaces + String[] tags2 = {"test", "one", "Full Color"}; + blacklistedTag = ripper.checkTags(ripper.getFirstPage(), tags2); + assertEquals("full color", blacklistedTag); + + } } \ No newline at end of file From d945a51c4d4881998ca4bc1bb7c7d0f54a2ab9f5 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Thu, 13 Sep 2018 07:27:27 -0400 Subject: [PATCH 2/3] refactored code to cut down on repeat code --- .../ripme/ripper/rippers/TsuminoRipper.java | 30 ++----------------- .../com/rarchives/ripme/utils/RipUtils.java | 24 +++++++++++++++ 2 files changed, 27 insertions(+), 27 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/TsuminoRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/TsuminoRipper.java index 8855ec16..529b08d9 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/TsuminoRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/TsuminoRipper.java @@ -12,6 +12,7 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; import com.rarchives.ripme.ui.RipStatusMessage; +import com.rarchives.ripme.utils.RipUtils; import com.rarchives.ripme.utils.Utils; import org.json.JSONArray; import org.json.JSONObject; @@ -31,7 +32,7 @@ public class TsuminoRipper extends AbstractHTMLRipper { super(url); } - private List getTags(Document doc) { + public List getTags(Document doc) { List tags = new ArrayList<>(); LOGGER.info("Getting tags"); for (Element tag : doc.select("div#Tag > a")) { @@ -41,31 +42,6 @@ public class TsuminoRipper extends AbstractHTMLRipper { return tags; } - /** - * Checks for blacklisted tags on page. If it finds one it returns it, if not it return null - * - * @param doc - * @return String - */ - public String checkTags(Document doc, String[] blackListedTags) { - // If the user hasn't blacklisted any tags we return null; - if (blackListedTags == null) { - return null; - } - LOGGER.info("Blacklisted tags " + blackListedTags[0]); - List tagsOnPage = getTags(doc); - for (String tag : blackListedTags) { - for (String pageTag : tagsOnPage) { - // We replace all dashes in the tag with spaces because the tags we get from the site are separated using - // dashes - if (tag.trim().toLowerCase().equals(pageTag.toLowerCase())) { - return tag.toLowerCase(); - } - } - } - return null; - } - private JSONArray getPageUrls() { String postURL = "http://www.tsumino.com/Read/Load"; try { @@ -124,7 +100,7 @@ public class TsuminoRipper extends AbstractHTMLRipper { Connection.Response resp = Http.url(url).response(); cookies.putAll(resp.cookies()); Document doc = resp.parse(); - String blacklistedTag = checkTags(doc, Utils.getConfigStringArray("tsumino.blacklist.tags")); + String blacklistedTag = RipUtils.checkTags(Utils.getConfigStringArray("tsumino.blacklist.tags"), getTags(doc)); if (blacklistedTag != null) { sendUpdate(RipStatusMessage.STATUS.DOWNLOAD_WARN, "Skipping " + url.toExternalForm() + " as it " + "contains the blacklisted tag \"" + blacklistedTag + "\""); diff --git a/src/main/java/com/rarchives/ripme/utils/RipUtils.java b/src/main/java/com/rarchives/ripme/utils/RipUtils.java index f0752c06..5dea166b 100644 --- a/src/main/java/com/rarchives/ripme/utils/RipUtils.java +++ b/src/main/java/com/rarchives/ripme/utils/RipUtils.java @@ -305,4 +305,28 @@ public class RipUtils { } return cookies; } + + /** + * Checks for blacklisted tags on page. If it finds one it returns it, if not it return null + * + * @param blackListedTags a string array of the blacklisted tags + * @param tagsOnPage the tags on the page + * @return String + */ + public static String checkTags(String[] blackListedTags, List tagsOnPage) { + // If the user hasn't blacklisted any tags we return null; + if (blackListedTags == null) { + return null; + } + for (String tag : blackListedTags) { + for (String pageTag : tagsOnPage) { + // We replace all dashes in the tag with spaces because the tags we get from the site are separated using + // dashes + if (tag.trim().toLowerCase().equals(pageTag.toLowerCase())) { + return tag.toLowerCase(); + } + } + } + return null; + } } From f8bce9e28851cf09e750242c7eea097f1c198691 Mon Sep 17 00:00:00 2001 From: cyian-1756 Date: Thu, 13 Sep 2018 07:27:39 -0400 Subject: [PATCH 3/3] Added more unit tests --- .../tst/ripper/rippers/TsuminoRipperTest.java | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/TsuminoRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/TsuminoRipperTest.java index 889a43b4..7d403776 100644 --- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/TsuminoRipperTest.java +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/TsuminoRipperTest.java @@ -5,6 +5,9 @@ import java.net.URL; import java.util.List; import com.rarchives.ripme.ripper.rippers.TsuminoRipper; +import com.rarchives.ripme.utils.RipUtils; +import org.jsoup.nodes.Document; + public class TsuminoRipperTest extends RippersTest { public void testTsuminoRipper() throws IOException { @@ -14,14 +17,21 @@ public class TsuminoRipperTest extends RippersTest { public void testTagBlackList() throws IOException { TsuminoRipper ripper = new TsuminoRipper(new URL("http://www.tsumino.com/Book/Info/42882/chaldea-maid-")); + Document doc = ripper.getFirstPage(); + List tagsOnPage = ripper.getTags(doc); String[] tags1 = {"test", "one", "Blowjob"}; - String blacklistedTag = ripper.checkTags(ripper.getFirstPage(), tags1); + String blacklistedTag = RipUtils.checkTags(tags1, tagsOnPage); assertEquals("blowjob", blacklistedTag); // Test a tag with spaces String[] tags2 = {"test", "one", "Full Color"}; - blacklistedTag = ripper.checkTags(ripper.getFirstPage(), tags2); + blacklistedTag = RipUtils.checkTags(tags2, tagsOnPage); assertEquals("full color", blacklistedTag); + // Test a album with no blacklisted tags + String[] tags3 = {"nothing", "one", "null"}; + blacklistedTag = RipUtils.checkTags(tags3, tagsOnPage); + assertNull(blacklistedTag); + } } \ No newline at end of file