diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ListalRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ListalRipper.java index 71e9e79d..8986fd91 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/ListalRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ListalRipper.java @@ -11,17 +11,25 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; import com.rarchives.ripme.ripper.AbstractHTMLRipper; import com.rarchives.ripme.ripper.DownloadThreadPool; import com.rarchives.ripme.utils.Http; +/** + * @author Tushar + * + */ public class ListalRipper extends AbstractHTMLRipper { - private Pattern p = Pattern.compile("https://www.listal.com/list/([a-zA-Z0-9-]+)"); + private Pattern p1 = Pattern.compile("https:\\/\\/www.listal.com\\/list\\/([a-zA-Z0-9-]+)"); + private Pattern p2 = + Pattern.compile("https:\\/\\/www.listal.com\\/((?:(?:[a-zA-Z0-9-]+)\\/?)+)"); private String listId = null; // listId to get more images via POST. private String postUrl = "https://www.listal.com/item-list/"; //to load more images. + private UrlType urlType = UrlType.UNKNOWN; private DownloadThreadPool listalThreadPool = new DownloadThreadPool("listalThreadPool"); @@ -42,21 +50,22 @@ public class ListalRipper extends AbstractHTMLRipper { @Override public Document getFirstPage() throws IOException { Document doc = Http.url(url).get(); - listId = doc.select("#customlistitems").first().attr("data-listid"); + if (urlType == UrlType.LIST) { + listId = doc.select("#customlistitems").first().attr("data-listid"); // Used for list types. + } return doc; } @Override public List getURLsFromPage(Document page) { - Matcher m = p.matcher(url.toExternalForm()); - if (m.matches()) { - // for url of type https://www.listal.com/list/my-list - return getURLsFromList(page); - } else { - // for url of type https://www.listal.com/jim-carrey/pictures - //TODO need to write - return null; + if (urlType == UrlType.LIST) { + // for url of type LIST, https://www.listal.com/list/my-list + return getURLsForListType(page); + } else if (urlType == UrlType.FOLDER) { + // for url of type FOLDER, https://www.listal.com/jim-carrey/pictures + return getURLsForFolderType(page); } + return null; } @Override @@ -66,54 +75,111 @@ public class ListalRipper extends AbstractHTMLRipper { @Override public String getGID(URL url) throws MalformedURLException { - Pattern p = Pattern.compile("https://www.listal.com/list/([a-zA-Z0-9-]+)"); - Matcher m = p.matcher(url.toExternalForm()); - if (m.matches()) { + Matcher m1 = p1.matcher(url.toExternalForm()); + if (m1.matches()) { // Return the text contained between () in the regex - return m.group(1); + urlType = UrlType.LIST; + return m1.group(1); + } + + Matcher m2 = p2.matcher(url.toExternalForm()); + if (m2.matches()) { + // Return only gid from capturing group of type listal.com/tvOrSomething/dexter/pictures + urlType = UrlType.FOLDER; + return getFolderTypeGid(m2.group(1)); } - //TODO match /../celebrity_name/images throw new MalformedURLException("Expected listal.com URL format: " + "listal.com/list/my-list-name - got " + url + " instead."); } @Override - public Document getNextPage(Document doc) throws IOException { - // TODO Auto-generated method stub - return super.getNextPage(doc); + public Document getNextPage(Document page) throws IOException { + Document nextPage = super.getNextPage(page); + switch (urlType) { + case LIST: + if (!page.select(".loadmoreitems").isEmpty()) { + // All items are not loaded. + // Load remaining items using postUrl. + + String offSet = page.select(".loadmoreitems").last().attr("data-offset"); + Map postParams = new HashMap<>(); + postParams.put("listid", listId); + postParams.put("offset", offSet); + try { + nextPage = Http.url(postUrl).data(postParams).retries(3).post(); + } catch (IOException e1) { + LOGGER.error("Failed to load more images after " + offSet, e1); + throw e1; + } + } + break; + + case FOLDER: + Elements pageLinks = page.select(".pages a"); + if (!pageLinks.isEmpty() && pageLinks.last().text().startsWith("Next")) { + String nextUrl = pageLinks.last().attr("abs:href"); + nextPage = Http.url(nextUrl).retries(3).get(); + } + break; + + case UNKNOWN: + default: + } + return nextPage; } - private List getURLsFromList(Document page) { - // recursive method for url type: https://www.listal.com/list/my-list + @Override + public DownloadThreadPool getThreadPool() { + return listalThreadPool; + } + + /** + * Returns the image urls for UrlType LIST. + */ + private List getURLsForListType(Document page) { List list = new ArrayList<>(); for (Element e : page.select(".pure-g a[href*=viewimage]")) { //list.add("https://www.listal.com" + e.attr("href") + "h"); list.add(e.attr("abs:href") + "h"); } - if (!page.select(".loadmoreitems").isEmpty()) { - // All items are not loaded. - // Load remaining items using postUrl. - - String offSet = page.select(".loadmoreitems").last().attr("data-offset"); - Map postParams = new HashMap<>(); - postParams.put("listid", listId); - postParams.put("offset", offSet); - try { - list.addAll(getURLsFromList(Http.url(postUrl).data(postParams).retries(3).post())); - } catch (IOException e1) { - LOGGER.error("Failed to load more images after " + offSet, e1); - } - } - return list; } - @Override - public DownloadThreadPool getThreadPool() { - return listalThreadPool; + /** + * Returns the image urls for UrlType FOLDER. + */ + private List getURLsForFolderType(Document page) { + List list = new ArrayList<>(); + for (Element e : page.select("#browseimagescontainer .imagewrap-outer a")) { + list.add(e.attr("abs:href") + "h"); + } + return list; + } + + /** + * Returns the gid for url type listal.com/tvOrSomething/dexter/pictures + */ + public String getFolderTypeGid(String group) throws MalformedURLException { + String[] folders = group.split("/"); + try { + if (folders.length == 2 && folders[1].equals("pictures")) { + // Url is probably for an actor. + return folders[0]; + } + + if (folders.length == 3 && folders[2].equals("pictures")) { + // Url if for a folder(like movies, tv etc). + Document doc = Http.url(url).get(); + return doc.select(".itemheadingmedium").first().text(); + } + + } catch (Exception e) { + LOGGER.error(e); + } + throw new MalformedURLException("Unable to fetch the gid for given url."); } private class ListalImageDownloadThread extends Thread { @@ -161,4 +227,8 @@ public class ListalRipper extends AbstractHTMLRipper { return name + ".jpg"; } } + + private static enum UrlType { + LIST, FOLDER, UNKNOWN + } } diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/ListalRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/ListalRipperTest.java index 906a1ba8..aba41af3 100644 --- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/ListalRipperTest.java +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/ListalRipperTest.java @@ -6,10 +6,24 @@ import com.rarchives.ripme.ripper.rippers.ListalRipper; public class ListalRipperTest extends RippersTest { - public void testRip() throws IOException { + /** + * Test for list type url. + * @throws IOException + */ + public void testRipListType() throws IOException { ListalRipper ripper = new ListalRipper(new URL("https://www.listal.com/list/evolution-emma-stone")); testRipper(ripper); } + /** + * Test for folder type url. + * @throws IOException + */ + public void testRipFolderType() throws IOException { + ListalRipper ripper = + new ListalRipper(new URL("https://www.listal.com/chet-atkins/pictures")); + testRipper(ripper); + } + }