From 07ee7a76c2dbb8a5ac4bc4f89910cf7a7915f7cb Mon Sep 17 00:00:00 2001 From: Tushar Date: Sun, 13 Jan 2019 20:52:24 +0530 Subject: [PATCH] Updated hqporner to support actress/category/studio/top links. --- .../ripme/ripper/rippers/HqpornerRipper.java | 348 +++++++++++++----- .../ripper/rippers/HqpornerRipperTest.java | 59 ++- 2 files changed, 307 insertions(+), 100 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/HqpornerRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/HqpornerRipper.java index 1fa27618..8297daa2 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/HqpornerRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/HqpornerRipper.java @@ -1,8 +1,13 @@ package com.rarchives.ripme.ripper.rippers; -import com.rarchives.ripme.ripper.AbstractSingleFileRipper; +import com.rarchives.ripme.ripper.AbstractHTMLRipper; +import com.rarchives.ripme.ripper.DownloadThreadPool; import com.rarchives.ripme.utils.Http; + +import org.jsoup.Connection.Response; import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; import java.io.IOException; import java.net.MalformedURLException; @@ -14,112 +19,279 @@ import java.util.regex.Pattern; import static com.rarchives.ripme.App.logger; -public class HqpornerRipper extends AbstractSingleFileRipper { +public class HqpornerRipper extends AbstractHTMLRipper { + private static final String VIDEO_URL_PREFIX = "https://hqporner.com"; - public HqpornerRipper(URL url) throws IOException { - super(url); - } + private Pattern p1 = Pattern.compile("https?://hqporner.com/hdporn/([a-zA-Z0-9_-]*).html/?$"); // video pattern. + private Pattern p2 = Pattern.compile("https://hqporner.com/([a-zA-Z0-9/_-]+)"); // category/top/actress/studio pattern. + private Pattern p3 = Pattern.compile("https?://[A-Za-z0-9/.-_]+\\.mp4"); // to match links ending with .mp4 + private DownloadThreadPool hqpornerThreadPool = new DownloadThreadPool("hqpornerThreadPool"); + private String subdirectory = ""; - private String getVideoFromMyDaddycc(String url) { - Pattern p = Pattern.compile("(//[a-zA-Z0-9\\.]+/pub/cid/[a-z0-9]+/1080.mp4)"); - try { - logger.info("Downloading " + url); - Document page = Http.url(url).referrer(url).get(); - Matcher m = p.matcher(page.html()); - logger.info(page.html()); - if (m.find()) { - return m.group(0); - } + public HqpornerRipper(URL url) throws IOException { + super(url); + } + @Override + public String getHost() { + return "hqporner"; + } - } catch (IOException e) { - logger.error("Unable to get page with video"); - } - return null; - } + @Override + public String getDomain() { + return "hqporner.com"; + } - private String getVideoFromFlyFlv(String url) { - try { - logger.info("Downloading " + url); - Document page = Http.url(url).referrer(url).get(); - String[] videoSizes = { "1080p","720p","360p"}; - for (String videoSize : videoSizes) { - String urlToReturn = page.select("video > source[label=" + videoSize).attr("src"); - if (urlToReturn != null && !urlToReturn.equals("")) { - return urlToReturn; - } - } + @Override + public String getGID(URL url) throws MalformedURLException { + Matcher m1 = p1.matcher(url.toExternalForm()); + if (m1.matches()) { + return m1.group(1); + } + Matcher m2 = p2.matcher(url.toExternalForm()); + if (m2.matches()) { + if (m2.group(1).indexOf('/') == -1) { + return m2.group(1); + } + return m2.group(1).substring(0, m2.group(1).indexOf('/')); //returns actress/category/top/studio + } + throw new MalformedURLException("Expected hqporner URL format: " + "hqporner.com/hdporn/NAME\n" + + "hqporner.com/category/myfavcategory\n" + "hqporner.com/actress/myfavactress\n" + + "hqporner.com/studio/myFavStudio\n" + " - got " + url + " instead."); + } + @Override + public Document getFirstPage() throws IOException { + // "url" is an instance field of the superclass + return Http.url(url).get(); + } - } catch (IOException e) { - logger.error("Unable to get page with video"); - } - return null; - } + @Override + public List getURLsFromPage(Document doc) { + List result = new ArrayList<>(); + Matcher m1 = p1.matcher(this.url.toExternalForm()); // video url. + Matcher m2 = p2.matcher(this.url.toExternalForm()); // category/top/actress/studio url. - private String getVideoName() { - try { - String filename = getGID(url); - return filename; - } catch (MalformedURLException e) { - return "1080"; - } - } + if (m1.matches()) { + //subdirectory = subdirectory + result.add(this.url.toExternalForm()); + return result; + } else if (m2.matches()) { + if (m2.group(1).indexOf('/') != -1) + subdirectory = m2.group(1).substring(m2.group(1).indexOf('/') + 1); + result = getAllVideoUrls(doc); + return result; + } + //empty array for rest. + return result; + } - @Override - public String getHost() { - return "hqporner"; - } + public List getAllVideoUrls(Document doc) { + // div.6u h3 a.click-trigger + List result = new ArrayList<>(); + Elements videoLinks = doc.select("div.6u h3 a.click-trigger"); + for (Element e : videoLinks) { + if (e.hasAttr("href")) { + result.add(VIDEO_URL_PREFIX + e.attr("href")); + } + } - @Override - public String getDomain() { - return "hqporner.com"; - } + return result; + } - @Override - public String getGID(URL url) throws MalformedURLException { - Pattern p = Pattern.compile("https?://hqporner.com/hdporn/([a-zA-Z0-9_-]*).html/?$"); - Matcher m = p.matcher(url.toExternalForm()); - if (m.matches()) { - return m.group(1); - } - throw new MalformedURLException("Expected hqporner URL format: " + - "hqporner.com/hdporn/NAME - got " + url + " instead"); - } + @Override + public boolean tryResumeDownload() { + return true; + } - @Override - public Document getFirstPage() throws IOException { - // "url" is an instance field of the superclass - return Http.url(url).get(); - } + @Override + public void downloadURL(URL url, int index) { + hqpornerThreadPool.addThread(new HqpornerDownloadThread(url, index, subdirectory)); + } - @Override - public List getURLsFromPage(Document doc) { - String videoUrl = null; - List result = new ArrayList<>(); - String videoPageUrl = "https:" + doc.select("div.videoWrapper > iframe").attr("src"); + @Override + public Document getNextPage(Document doc) throws IOException { + Elements pageNumbers = doc.select("ul.pagination a[href]"); + if (!pageNumbers.isEmpty() && pageNumbers.last().text().contains("Next")) { + return Http.url(VIDEO_URL_PREFIX + pageNumbers.last().attr("href")).get(); + } + throw new IOException("No next page found."); + } - if (videoPageUrl.contains("mydaddy")) { - videoUrl = getVideoFromMyDaddycc(videoPageUrl); - } else if (videoPageUrl.contains("flyflv")) { - videoUrl = getVideoFromFlyFlv(videoPageUrl); - } + @Override + protected DownloadThreadPool getThreadPool() { + return hqpornerThreadPool; + } - if (videoUrl != null) { - result.add("https:" + videoUrl); - } - return result; - } + private class HqpornerDownloadThread extends Thread { - @Override - public boolean tryResumeDownload() {return true;} + private URL hqpornerVideoPageUrl; + //private int index; + private String subdirectory; - @Override - public void downloadURL(URL url, int index) { - addURLToDownload(url, "", "", "", null, getVideoName(), "mp4"); - } + public HqpornerDownloadThread(URL url, int index, String subdirectory) { + this.hqpornerVideoPageUrl = url; + //this.index = index; + this.subdirectory = subdirectory; + } + @Override + public void run() { + fetchVideo(); + } + + public void fetchVideo() { + try { + + Document doc = Http.url(hqpornerVideoPageUrl).retries(3).get(); + String downloadUrl = null; + String videoPageUrl = "https:" + doc.select("div.videoWrapper > iframe").attr("src"); + + if (videoPageUrl.contains("mydaddy")) { + downloadUrl = getVideoFromMyDaddycc(videoPageUrl); + } else if (videoPageUrl.contains("flyflv")) { + downloadUrl = getVideoFromFlyFlv(videoPageUrl); + } else { + //trying a generic selector to grab video url. + downloadUrl = getVideoFromUnknown(videoPageUrl); + } + + if (downloadUrl != null) { + addURLToDownload(new URL(downloadUrl), "", subdirectory, "", null, getVideoName(), "mp4"); + } + + } catch (IOException e) { + LOGGER.error("[!] Exception while downloading video.", e); + } + } + + private String getVideoFromMyDaddycc(String videoPageUrl) { + Pattern p = Pattern.compile("(//[a-zA-Z0-9\\.]+/pub/cid/[a-z0-9]+/1080.mp4)"); + try { + logger.info("Downloading from mydaddy " + videoPageUrl); + Document page = Http.url(videoPageUrl).referrer(hqpornerVideoPageUrl).get(); + Matcher m = p.matcher(page.html()); + logger.info(page.html()); + if (m.find()) { + return "https:" + m.group(0); + } + + } catch (IOException e) { + logger.error("Unable to get page with video"); + } + return null; + } + + private String getVideoFromFlyFlv(String videoPageUrl) { + try { + logger.info("Downloading from flyflv " + videoPageUrl); + Document page = Http.url(videoPageUrl).referrer(hqpornerVideoPageUrl).get(); + String[] videoSizes = { "1080p", "720p", "360p" }; + for (String videoSize : videoSizes) { + String urlToReturn = page.select("video > source[label=" + videoSize).attr("src"); + if (urlToReturn != null && !urlToReturn.equals("")) { + return "https:" + urlToReturn; + } + } + + } catch (IOException e) { + logger.error("Unable to get page with video"); + } + return null; + } + + private String getVideoFromUnknown(String videoPageurl) { + // If video host is neither daddycc or flyflv TRY generic way. + // 1. Search any src$=.mp4 + // 2. Pattern match http(s)://.../../abcd.mp4 + // 3. GET all src link with same host and run 2. + + try { + logger.info("Trying to download from unknown video host " + videoPageurl); + URL url = new URL(videoPageurl); + Response response = Http.url(url).referrer(hqpornerVideoPageUrl).response(); + Document doc = response.parse(); + + // 1. Search for src$=.mp4 + Elements endingWithMp4 = doc.select("[src$=.mp4]"); + if (!endingWithMp4.isEmpty()) { + List list = new ArrayList<>(); + endingWithMp4.forEach((e) -> list.add(e.attr("src"))); + return getBestQualityLink(list); + } + + // 2. Pattern match https?://somehost.cc/example123/abcd.mp4 + String link = matchUrlByPattern(p3, doc.html()); + if (link != null) { + return link; + } + + // 3. GET all src link with same host and run 2. + link = null; + Elements allElementsWithSrc = doc.select("[src*=" + url.getHost() + "]"); //all urls from same host. + allElementsWithSrc = allElementsWithSrc.select("[src~=/[A-Za-z0-9_-]+$]"); // remove links with extensions( .js). + for (Element e : allElementsWithSrc) { + Document d = Http.url(e.attr("src")).referrer(url.getHost()).get(); + link = matchUrlByPattern(p3, d.html()); + if (link != null) { + return link; + } + } + + } catch (IOException e) { + logger.error("Unable to get video url using generic methods."); + } + + // RIP unknown ripper. + logger.error("Unable to get video url using generic methods."); + return null; + + } + + private String matchUrlByPattern(Pattern pattern, String html) { + // Step 2. function + Matcher m = pattern.matcher(html); + List list = new ArrayList<>(); + while (m.find()) { + list.add(m.group()); + } + if (!list.isEmpty()) { + return getBestQualityLink(list); + } + + return null; + } + + private String getVideoName() { + try { + String filename = getGID(hqpornerVideoPageUrl); + return filename; + } catch (MalformedURLException e) { + return "1080"; + } + } + + }// class HqpornerDownloadThread + + public String getBestQualityLink(List list) { + // return link with the highest quality subsubstring. Keeping it simple for now. + // 1080 > 720 > 480 > 360 > 240 + if (list.isEmpty()) { + return null; + } + + String[] qualities = { "2160", "2160p", "1440", "1440p", "1080", "1080p", "720", "720p", "480", "480p" }; + for (String quality : qualities) { + for (String s : list) { + if (s.contains(quality)) { + return s; + } + } + } + // Could not find the best link. Return fist link. + return list.get(0); + } } diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/HqpornerRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/HqpornerRipperTest.java index 99e64d0f..dc872336 100644 --- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/HqpornerRipperTest.java +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/HqpornerRipperTest.java @@ -6,19 +6,54 @@ import com.rarchives.ripme.utils.Utils; import java.io.IOException; import java.net.URL; -public class HqpornerRipperTest extends RippersTest{ +public class HqpornerRipperTest extends RippersTest { - public void testRip() throws IOException { - if (Utils.getConfigBoolean("test.run_flaky_tests", false)) { - HqpornerRipper ripper = new HqpornerRipper(new URL("https://hqporner.com/hdporn/84636-pool_lesson_with_a_cheating_husband.html")); - testRipper(ripper); - } - } + public void testRip() throws IOException { + if (Utils.getConfigBoolean("test.run_flaky_tests", false)) { + HqpornerRipper ripper = new HqpornerRipper( + new URL("https://hqporner.com/hdporn/84636-pool_lesson_with_a_cheating_husband.html")); + testRipper(ripper); + } + } - public void testGetGID() throws IOException { - URL poolURL = new URL("https://hqporner.com/hdporn/84636-pool_lesson_with_a_cheating_husband.html"); - HqpornerRipper ripper = new HqpornerRipper(poolURL); - assertEquals("84636-pool_lesson_with_a_cheating_husband", ripper.getGID(poolURL)); - } + public void testGetGID() throws IOException { + URL poolURL = new URL("https://hqporner.com/hdporn/84636-pool_lesson_with_a_cheating_husband.html"); + HqpornerRipper ripper = new HqpornerRipper(poolURL); + assertEquals("84636-pool_lesson_with_a_cheating_husband", ripper.getGID(poolURL)); + } + public void testGetURLsFromPage() throws IOException { + URL actressUrl = new URL("https://hqporner.com/actress/kali-roses"); + HqpornerRipper ripper = new HqpornerRipper(actressUrl); + assert (ripper.getURLsFromPage(ripper.getFirstPage()).size() >= 2); + } + + public void testGetNextPage() throws IOException { + URL multiPageUrl = new URL("https://hqporner.com/category/tattooed"); + HqpornerRipper multiPageRipper = new HqpornerRipper(multiPageUrl); + assert (multiPageRipper.getNextPage(multiPageRipper.getFirstPage()) != null); + + URL singlePageUrl = new URL("https://hqporner.com/actress/amy-reid"); + HqpornerRipper ripper = new HqpornerRipper(singlePageUrl); + try { + ripper.getNextPage(ripper.getFirstPage()); + } catch (IOException e) { + assertEquals(e.getMessage(), "No next page found."); + } + } + + // public void testDifferentVideoHost() throws IOException { + // URL myDaddyUrl = new URL("https://hqporner.com/hdporn/90598-gangbang_with_Kali_Roses.html"); + // HqpornerRipper myDaddyRipper = new HqpornerRipper(myDaddyUrl); + // testRipper(myDaddyRipper); + // + // URL flyFlvUrl = new URL( + // "https://hqporner.com/hdporn/69862-bangbros_-_amy_reid_taking_off_a_tight_sexy_swimsuit.html"); + // HqpornerRipper flyFlvRipper = new HqpornerRipper(flyFlvUrl); + // testRipper(flyFlvRipper); + // + // URL unknownHostUrl = new URL("https://hqporner.com/hdporn/79528-Kayden_Kross_-_Serious_Masturbation.html"); // howq.cc + // HqpornerRipper unknownHostRipper = new HqpornerRipper(unknownHostUrl); + // testRipper(unknownHostRipper); + // } } \ No newline at end of file