diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/SankakuComplexRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/SankakuComplexRipper.java index cbd922bb..038b8100 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/SankakuComplexRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/SankakuComplexRipper.java @@ -43,7 +43,7 @@ public class SankakuComplexRipper extends AbstractHTMLRipper { Matcher m = p.matcher(url.toExternalForm()); if (m.matches()) { try { - return URLDecoder.decode(m.group(1), "UTF-8"); + return URLDecoder.decode(m.group(2), "UTF-8"); } catch (UnsupportedEncodingException e) { throw new MalformedURLException("Cannot decode tag name '" + m.group(1) + "'"); } @@ -68,31 +68,39 @@ public class SankakuComplexRipper extends AbstractHTMLRipper { List imageURLs = new ArrayList(); // Image URLs are basically thumbnail URLs with a different domain, a simple // path replacement, and a ?xxxxxx post ID at the end (obtainable from the href) - for (Element thumbSpan : doc.select("div.content > div > span.thumb")) { - String postId = thumbSpan.attr("id").replaceAll("p", ""); - Element thumb = thumbSpan.getElementsByTag("img").first(); - String image = thumb.attr("abs:src") - .replace(".sankakucomplex.com/data/preview", - "s.sankakucomplex.com/data") + "?" + postId; - imageURLs.add(image); + for (Element thumbSpan : doc.select("div.content > div > span.thumb > a")) { + String postLink = thumbSpan.attr("href"); + try { + // Get the page the full sized image is on + Document subPage = Http.url("https://chan.sankakucomplex.com" + postLink).get(); + logger.info("Checking page " + "https://chan.sankakucomplex.com" + postLink); + imageURLs.add("https:" + subPage.select("div[id=post-content] > a > img").attr("src")); + } catch (IOException e) { + logger.warn("Error while loading page " + postLink, e); + continue; + } } return imageURLs; } @Override public void downloadURL(URL url, int index) { - // Mock up the URL of the post page based on the post ID at the end of the URL. - String postId = url.toExternalForm().replaceAll(".*\\?", ""); - addURLToDownload(url, postId + "_", "", "", null); + sleep(8000); + addURLToDownload(url, getPrefix(index)); } @Override public Document getNextPage(Document doc) throws IOException { Element pagination = doc.select("div.pagination").first(); if (pagination.hasAttr("next-page-url")) { - return Http.url(pagination.attr("abs:next-page-url")).cookies(cookies).get(); - } else { - return null; + String nextPage = pagination.attr("abs:next-page-url"); + // Only logged in users can see past page 25 + // Trying to rip page 26 will throw a no images found error + if (nextPage.contains("page=26") == false) { + logger.info("Getting next page: " + pagination.attr("abs:next-page-url")); + return Http.url(pagination.attr("abs:next-page-url")).cookies(cookies).get(); + } } + throw new IOException("No more pages"); } }