mirror of
https://github.com/RipMeApp/ripme.git
synced 2025-08-04 21:07:38 +02:00
Merge pull request #1935 from IrvinLara9/danbooru-ripper
Added Danbooru ripper and tests
This commit is contained in:
@@ -0,0 +1,112 @@
|
|||||||
|
package com.rarchives.ripme.ripper.rippers;
|
||||||
|
|
||||||
|
import com.rarchives.ripme.ripper.AbstractJSONRipper;
|
||||||
|
import com.rarchives.ripme.utils.Http;
|
||||||
|
import com.rarchives.ripme.utils.Utils;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
import org.json.JSONArray;
|
||||||
|
import org.json.JSONObject;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.MalformedURLException;
|
||||||
|
import java.net.URI;
|
||||||
|
import java.net.URISyntaxException;
|
||||||
|
import java.net.URL;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
public class DanbooruRipper extends AbstractJSONRipper {
|
||||||
|
private static final Logger logger = Logger.getLogger(DanbooruRipper.class);
|
||||||
|
|
||||||
|
private static final String DOMAIN = "danbooru.donmai.us",
|
||||||
|
HOST = "danbooru";
|
||||||
|
|
||||||
|
private Pattern gidPattern = null;
|
||||||
|
|
||||||
|
private int currentPageNum = 1;
|
||||||
|
|
||||||
|
public DanbooruRipper(URL url) throws IOException {
|
||||||
|
super(url);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected String getDomain() {
|
||||||
|
return DOMAIN;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getHost() {
|
||||||
|
return HOST;
|
||||||
|
}
|
||||||
|
|
||||||
|
private String getPage(int num) throws MalformedURLException {
|
||||||
|
return "https://" + getDomain() + "/posts.json?page=" + num + "&tags=" + getTag(url);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected JSONObject getFirstPage() throws IOException {
|
||||||
|
String newCompatibleJSON = "{ resources:" + Http.url(getPage(1)).getJSONArray() + " }";
|
||||||
|
|
||||||
|
return new JSONObject(newCompatibleJSON);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected JSONObject getNextPage(JSONObject doc) throws IOException {
|
||||||
|
currentPageNum++;
|
||||||
|
|
||||||
|
JSONArray resourcesJSONArray = Http.url(getPage(currentPageNum)).getJSONArray();
|
||||||
|
|
||||||
|
int resourcesJSONArrayLength = resourcesJSONArray.length();
|
||||||
|
|
||||||
|
if (resourcesJSONArrayLength == 0) {
|
||||||
|
currentPageNum = 0;
|
||||||
|
throw new IOException("No more images in the next page");
|
||||||
|
}
|
||||||
|
|
||||||
|
String newCompatibleJSON = "{ resources:" + resourcesJSONArray + " }";
|
||||||
|
|
||||||
|
return new JSONObject(newCompatibleJSON);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected List<String> getURLsFromJSON(JSONObject json) {
|
||||||
|
List<String> res = new ArrayList<>(100);
|
||||||
|
JSONArray jsonArray = json.getJSONArray("resources");
|
||||||
|
for (int i = 0; i < jsonArray.length(); i++) {
|
||||||
|
if (jsonArray.getJSONObject(i).has("file_url")) {
|
||||||
|
res.add(jsonArray.getJSONObject(i).getString("file_url"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getGID(URL url) throws MalformedURLException {
|
||||||
|
try {
|
||||||
|
return Utils.filesystemSafe(new URI(getTag(url).replaceAll("([?&])tags=", "")).getPath());
|
||||||
|
} catch (URISyntaxException ex) {
|
||||||
|
logger.error(ex);
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new MalformedURLException("Expected booru URL format: " + getDomain() + "/posts?tags=searchterm - got " + url + " instead");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void downloadURL(URL url, int index) {
|
||||||
|
addURLToDownload(url, getPrefix(index));
|
||||||
|
}
|
||||||
|
|
||||||
|
private String getTag(URL url) throws MalformedURLException {
|
||||||
|
gidPattern = Pattern.compile("https?://danbooru.donmai.us/(posts)?.*([?&]tags=([a-zA-Z0-9$_.+!*'(),%-]+))(&|(#.*)?$)");
|
||||||
|
Matcher m = gidPattern.matcher(url.toExternalForm());
|
||||||
|
|
||||||
|
if (m.matches()) {
|
||||||
|
return m.group(3);
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new MalformedURLException("Expected danbooru URL format: " + getDomain() + "/posts?tags=searchterm - got " + url + " instead");
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@@ -9,6 +9,7 @@ import java.util.Map;
|
|||||||
import org.apache.commons.lang.ArrayUtils;
|
import org.apache.commons.lang.ArrayUtils;
|
||||||
import org.apache.commons.lang.StringUtils;
|
import org.apache.commons.lang.StringUtils;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
|
import org.json.JSONArray;
|
||||||
import org.json.JSONObject;
|
import org.json.JSONObject;
|
||||||
import org.jsoup.Connection;
|
import org.jsoup.Connection;
|
||||||
import org.jsoup.Connection.Method;
|
import org.jsoup.Connection.Method;
|
||||||
@@ -171,6 +172,12 @@ public class Http {
|
|||||||
return new JSONObject(jsonString);
|
return new JSONObject(jsonString);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public JSONArray getJSONArray() throws IOException {
|
||||||
|
ignoreContentType();
|
||||||
|
String jsonArray = response().body();
|
||||||
|
return new JSONArray(jsonArray);
|
||||||
|
}
|
||||||
|
|
||||||
public Response response() throws IOException {
|
public Response response() throws IOException {
|
||||||
Response response = null;
|
Response response = null;
|
||||||
IOException lastException = null;
|
IOException lastException = null;
|
||||||
|
@@ -2,6 +2,8 @@ package com.rarchives.ripme.tst.ripper.rippers;
|
|||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
import com.rarchives.ripme.ripper.rippers.BooruRipper;
|
import com.rarchives.ripme.ripper.rippers.BooruRipper;
|
||||||
import org.junit.jupiter.api.Assertions;
|
import org.junit.jupiter.api.Assertions;
|
||||||
@@ -10,14 +12,49 @@ import org.junit.jupiter.api.Test;
|
|||||||
public class BooruRipperTest extends RippersTest {
|
public class BooruRipperTest extends RippersTest {
|
||||||
@Test
|
@Test
|
||||||
public void testRip() throws IOException {
|
public void testRip() throws IOException {
|
||||||
BooruRipper ripper = new BooruRipper(new URL("http://xbooru.com/index.php?page=post&s=list&tags=furry"));
|
List<URL> passURLs = new ArrayList<>();
|
||||||
testRipper(ripper);
|
passURLs.add(new URL("https://xbooru.com/index.php?page=post&s=list&tags=furry"));
|
||||||
|
passURLs.add(new URL("https://gelbooru.com/index.php?page=post&s=list&tags=animal_ears"));
|
||||||
|
|
||||||
|
for (URL url : passURLs) {
|
||||||
|
BooruRipper ripper = new BooruRipper(url);
|
||||||
|
testRipper(ripper);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testGetGID() throws IOException {
|
public void testGetGID() throws IOException {
|
||||||
URL url = new URL("http://xbooru.com/index.php?page=post&s=list&tags=furry");
|
URL xbooruUrl = new URL("https://xbooru.com/index.php?page=post&s=list&tags=furry");
|
||||||
BooruRipper ripper = new BooruRipper(url);
|
URL gelbooruUrl = new URL("https://gelbooru.com/index.php?page=post&s=list&tags=animal_ears");
|
||||||
Assertions.assertEquals("furry", ripper.getGID(url));
|
|
||||||
|
BooruRipper xbooruRipper = new BooruRipper(xbooruUrl);
|
||||||
|
BooruRipper gelbooruRipper = new BooruRipper(gelbooruUrl);
|
||||||
|
|
||||||
|
Assertions.assertEquals("furry", xbooruRipper.getGID(xbooruUrl));
|
||||||
|
Assertions.assertEquals("animal_ears", gelbooruRipper.getGID(gelbooruUrl));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGetDomain() throws IOException {
|
||||||
|
URL xbooruUrl = new URL("https://xbooru.com/index.php?page=post&s=list&tags=furry");
|
||||||
|
URL gelbooruUrl = new URL("https://gelbooru.com/index.php?page=post&s=list&tags=animal_ears");
|
||||||
|
|
||||||
|
BooruRipper xbooruRipper = new BooruRipper(xbooruUrl);
|
||||||
|
BooruRipper gelbooruRipper = new BooruRipper(gelbooruUrl);
|
||||||
|
|
||||||
|
Assertions.assertEquals("xbooru.com", xbooruRipper.getDomain());
|
||||||
|
Assertions.assertEquals("gelbooru.com", gelbooruRipper.getDomain());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGetHost() throws IOException {
|
||||||
|
URL xbooruUrl = new URL("https://xbooru.com/index.php?page=post&s=list&tags=furry");
|
||||||
|
URL gelbooruUrl = new URL("https://gelbooru.com/index.php?page=post&s=list&tags=animal_ears");
|
||||||
|
|
||||||
|
BooruRipper xbooruRipper = new BooruRipper(xbooruUrl);
|
||||||
|
BooruRipper gelbooruRipper = new BooruRipper(gelbooruUrl);
|
||||||
|
|
||||||
|
Assertions.assertEquals("xbooru", xbooruRipper.getHost());
|
||||||
|
Assertions.assertEquals("gelbooru", gelbooruRipper.getHost());
|
||||||
}
|
}
|
||||||
}
|
}
|
@@ -0,0 +1,45 @@
|
|||||||
|
package com.rarchives.ripme.tst.ripper.rippers;
|
||||||
|
|
||||||
|
import com.rarchives.ripme.ripper.rippers.DanbooruRipper;
|
||||||
|
import org.junit.jupiter.api.Assertions;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.URL;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public class DanbooruRipperTest extends RippersTest {
|
||||||
|
@Test
|
||||||
|
public void testRip() throws IOException {
|
||||||
|
List<URL> passURLs = new ArrayList<>();
|
||||||
|
passURLs.add(new URL("https://danbooru.donmai.us/posts?tags=brown_necktie"));
|
||||||
|
passURLs.add(new URL("https://danbooru.donmai.us/posts?page=1&tags=pink_sweater_vest"));
|
||||||
|
|
||||||
|
for (URL url : passURLs) {
|
||||||
|
DanbooruRipper danbooruRipper = new DanbooruRipper(url);
|
||||||
|
testRipper(danbooruRipper);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGetGID() throws IOException {
|
||||||
|
URL danBooruUrl = new URL("https://danbooru.donmai.us/posts?tags=brown_necktie");
|
||||||
|
URL danBooruUrl2 = new URL("https://danbooru.donmai.us/posts?page=1&tags=pink_sweater_vest");
|
||||||
|
|
||||||
|
DanbooruRipper danbooruRipper = new DanbooruRipper(danBooruUrl);
|
||||||
|
DanbooruRipper danbooruRipper2 = new DanbooruRipper(danBooruUrl2);
|
||||||
|
|
||||||
|
Assertions.assertEquals("brown_necktie", danbooruRipper.getGID(danBooruUrl));
|
||||||
|
Assertions.assertEquals("pink_sweater_vest", danbooruRipper2.getGID(danBooruUrl2));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGetHost() throws IOException {
|
||||||
|
URL danBooruUrl = new URL("https://danbooru.donmai.us/posts?tags=brown_necktie");
|
||||||
|
|
||||||
|
DanbooruRipper danbooruRipper = new DanbooruRipper(danBooruUrl);
|
||||||
|
|
||||||
|
Assertions.assertEquals("danbooru", danbooruRipper.getHost());
|
||||||
|
}
|
||||||
|
}
|
Reference in New Issue
Block a user