diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/DanbooruRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/DanbooruRipper.java new file mode 100644 index 00000000..fb0bb233 --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/DanbooruRipper.java @@ -0,0 +1,109 @@ +package com.rarchives.ripme.ripper.rippers; + +import com.rarchives.ripme.ripper.AbstractJSONRipper; +import com.rarchives.ripme.utils.Http; +import com.rarchives.ripme.utils.Utils; +import org.json.JSONArray; +import org.json.JSONObject; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class DanbooruRipper extends AbstractJSONRipper { + private static final String DOMAIN = "danbooru.donmai.us", + HOST = "danbooru"; + + private Pattern gidPattern = null; + + private int currentPageNum = 1; + + public DanbooruRipper(URL url) throws IOException { + super(url); + } + + @Override + protected String getDomain() { + return DOMAIN; + } + + @Override + public String getHost() { + return HOST; + } + + private String getPage(int num) throws MalformedURLException { + return "https://" + getDomain() + "/posts.json?page=" + num + "&tags=" + getTag(url); + } + + @Override + protected JSONObject getFirstPage() throws IOException { + String newCompatibleJSON = "{ resources:" + Http.url(getPage(1)).getJSONArray() + " }"; + + return new JSONObject(newCompatibleJSON); + } + + @Override + protected JSONObject getNextPage(JSONObject doc) throws IOException { + currentPageNum++; + + JSONArray resourcesJSONArray = Http.url(getPage(currentPageNum)).getJSONArray(); + + int resourcesJSONArrayLength = resourcesJSONArray.length(); + + if (resourcesJSONArrayLength == 0) { + currentPageNum = 0; + throw new IOException("No more images in the next page"); + } + + String newCompatibleJSON = "{ resources:" + resourcesJSONArray + " }"; + + return new JSONObject(newCompatibleJSON); + } + + @Override + protected List getURLsFromJSON(JSONObject json) { + List res = new ArrayList<>(100); + JSONArray jsonArray = json.getJSONArray("resources"); + for (int i = 0; i < jsonArray.length(); i++) { + if (jsonArray.getJSONObject(i).has("file_url")) { + res.add(jsonArray.getJSONObject(i).getString("file_url")); + } + } + return res; + } + + @Override + public String getGID(URL url) throws MalformedURLException { + try { + return Utils.filesystemSafe(new URI(getTag(url).replaceAll("([?&])tags=", "")).getPath()); + } catch (URISyntaxException ex) { + LOGGER.error(ex); + } + + throw new MalformedURLException("Expected booru URL format: " + getDomain() + "/posts?tags=searchterm - got " + url + " instead"); + } + + @Override + protected void downloadURL(URL url, int index) { + addURLToDownload(url, getPrefix(index)); + } + + private String getTag(URL url) throws MalformedURLException { + gidPattern = Pattern.compile("https?://danbooru.donmai.us/(posts)?.*([?&]tags=([a-zA-Z0-9$_.+!*'(),%-]+))(&|(#.*)?$)"); + Matcher m = gidPattern.matcher(url.toExternalForm()); + + if (m.matches()) { + return m.group(3); + } + + throw new MalformedURLException("Expected danbooru URL format: " + getDomain() + "/posts?tags=searchterm - got " + url + " instead"); + } + +} diff --git a/src/main/java/com/rarchives/ripme/utils/Http.java b/src/main/java/com/rarchives/ripme/utils/Http.java index fe020041..0c76a768 100644 --- a/src/main/java/com/rarchives/ripme/utils/Http.java +++ b/src/main/java/com/rarchives/ripme/utils/Http.java @@ -4,6 +4,7 @@ import com.rarchives.ripme.ripper.AbstractRipper; import org.apache.commons.lang.ArrayUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.json.JSONArray; import org.json.JSONObject; import org.jsoup.Connection; import org.jsoup.Connection.Method; @@ -182,6 +183,12 @@ public class Http { return new JSONObject(jsonString); } + public JSONArray getJSONArray() throws IOException { + ignoreContentType(); + String jsonArray = response().body(); + return new JSONArray(jsonArray); + } + public Response response() throws IOException { Response response; IOException lastException = null; diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/BooruRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/BooruRipperTest.java index 8fa2cfc0..f7918aad 100644 --- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/BooruRipperTest.java +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/BooruRipperTest.java @@ -2,6 +2,8 @@ package com.rarchives.ripme.tst.ripper.rippers; import java.io.IOException; import java.net.URL; +import java.util.ArrayList; +import java.util.List; import com.rarchives.ripme.ripper.rippers.BooruRipper; import org.junit.jupiter.api.Assertions; @@ -10,14 +12,49 @@ import org.junit.jupiter.api.Test; public class BooruRipperTest extends RippersTest { @Test public void testRip() throws IOException { - BooruRipper ripper = new BooruRipper(new URL("http://xbooru.com/index.php?page=post&s=list&tags=furry")); - testRipper(ripper); + List passURLs = new ArrayList<>(); + passURLs.add(new URL("https://xbooru.com/index.php?page=post&s=list&tags=furry")); + passURLs.add(new URL("https://gelbooru.com/index.php?page=post&s=list&tags=animal_ears")); + + for (URL url : passURLs) { + BooruRipper ripper = new BooruRipper(url); + testRipper(ripper); + } } @Test public void testGetGID() throws IOException { - URL url = new URL("http://xbooru.com/index.php?page=post&s=list&tags=furry"); - BooruRipper ripper = new BooruRipper(url); - Assertions.assertEquals("furry", ripper.getGID(url)); + URL xbooruUrl = new URL("https://xbooru.com/index.php?page=post&s=list&tags=furry"); + URL gelbooruUrl = new URL("https://gelbooru.com/index.php?page=post&s=list&tags=animal_ears"); + + BooruRipper xbooruRipper = new BooruRipper(xbooruUrl); + BooruRipper gelbooruRipper = new BooruRipper(gelbooruUrl); + + Assertions.assertEquals("furry", xbooruRipper.getGID(xbooruUrl)); + Assertions.assertEquals("animal_ears", gelbooruRipper.getGID(gelbooruUrl)); + } + + @Test + public void testGetDomain() throws IOException { + URL xbooruUrl = new URL("https://xbooru.com/index.php?page=post&s=list&tags=furry"); + URL gelbooruUrl = new URL("https://gelbooru.com/index.php?page=post&s=list&tags=animal_ears"); + + BooruRipper xbooruRipper = new BooruRipper(xbooruUrl); + BooruRipper gelbooruRipper = new BooruRipper(gelbooruUrl); + + Assertions.assertEquals("xbooru.com", xbooruRipper.getDomain()); + Assertions.assertEquals("gelbooru.com", gelbooruRipper.getDomain()); + } + + @Test + public void testGetHost() throws IOException { + URL xbooruUrl = new URL("https://xbooru.com/index.php?page=post&s=list&tags=furry"); + URL gelbooruUrl = new URL("https://gelbooru.com/index.php?page=post&s=list&tags=animal_ears"); + + BooruRipper xbooruRipper = new BooruRipper(xbooruUrl); + BooruRipper gelbooruRipper = new BooruRipper(gelbooruUrl); + + Assertions.assertEquals("xbooru", xbooruRipper.getHost()); + Assertions.assertEquals("gelbooru", gelbooruRipper.getHost()); } } \ No newline at end of file diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/DanbooruRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/DanbooruRipperTest.java new file mode 100644 index 00000000..575864a5 --- /dev/null +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/DanbooruRipperTest.java @@ -0,0 +1,45 @@ +package com.rarchives.ripme.tst.ripper.rippers; + +import com.rarchives.ripme.ripper.rippers.DanbooruRipper; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; + +public class DanbooruRipperTest extends RippersTest { + @Test + public void testRip() throws IOException { + List passURLs = new ArrayList<>(); + passURLs.add(new URL("https://danbooru.donmai.us/posts?tags=brown_necktie")); + passURLs.add(new URL("https://danbooru.donmai.us/posts?page=1&tags=pink_sweater_vest")); + + for (URL url : passURLs) { + DanbooruRipper danbooruRipper = new DanbooruRipper(url); + testRipper(danbooruRipper); + } + } + + @Test + public void testGetGID() throws IOException { + URL danBooruUrl = new URL("https://danbooru.donmai.us/posts?tags=brown_necktie"); + URL danBooruUrl2 = new URL("https://danbooru.donmai.us/posts?page=1&tags=pink_sweater_vest"); + + DanbooruRipper danbooruRipper = new DanbooruRipper(danBooruUrl); + DanbooruRipper danbooruRipper2 = new DanbooruRipper(danBooruUrl2); + + Assertions.assertEquals("brown_necktie", danbooruRipper.getGID(danBooruUrl)); + Assertions.assertEquals("pink_sweater_vest", danbooruRipper2.getGID(danBooruUrl2)); + } + + @Test + public void testGetHost() throws IOException { + URL danBooruUrl = new URL("https://danbooru.donmai.us/posts?tags=brown_necktie"); + + DanbooruRipper danbooruRipper = new DanbooruRipper(danBooruUrl); + + Assertions.assertEquals("danbooru", danbooruRipper.getHost()); + } +}