From 19ec16bf7f81c9af68aa62a69f6d71120bc56e8c Mon Sep 17 00:00:00 2001 From: 0x1f595 <0x1f595@users.noreply.github.com> Date: Tue, 8 Oct 2019 21:20:00 -0600 Subject: [PATCH 1/2] Adding several Mastodon instance rippers --- .../ripme/ripper/rippers/ArtAlleyRipper.java | 20 ++++ .../ripme/ripper/rippers/BaraagRipper.java | 20 ++++ .../ripme/ripper/rippers/MastodonRipper.java | 96 +++++++++++++++++++ .../ripper/rippers/MastodonXyzRipper.java | 20 ++++ .../ripme/ripper/rippers/PawooRipper.java | 20 ++++ 5 files changed, 176 insertions(+) create mode 100644 src/main/java/com/rarchives/ripme/ripper/rippers/ArtAlleyRipper.java create mode 100644 src/main/java/com/rarchives/ripme/ripper/rippers/BaraagRipper.java create mode 100644 src/main/java/com/rarchives/ripme/ripper/rippers/MastodonRipper.java create mode 100644 src/main/java/com/rarchives/ripme/ripper/rippers/MastodonXyzRipper.java create mode 100644 src/main/java/com/rarchives/ripme/ripper/rippers/PawooRipper.java diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/ArtAlleyRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/ArtAlleyRipper.java new file mode 100644 index 00000000..c70a64bc --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/ArtAlleyRipper.java @@ -0,0 +1,20 @@ +package com.rarchives.ripme.ripper.rippers; + +import java.io.IOException; +import java.net.URL; + +public class ArtAlleyRipper extends MastodonRipper { + public ArtAlleyRipper(URL url) throws IOException { + super(url); + } + + @Override + public String getHost() { + return "artalley"; + } + + @Override + public String getDomain() { + return "artalley.social"; + } +} diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/BaraagRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/BaraagRipper.java new file mode 100644 index 00000000..8d39864c --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/BaraagRipper.java @@ -0,0 +1,20 @@ +package com.rarchives.ripme.ripper.rippers; + +import java.io.IOException; +import java.net.URL; + +public class BaraagRipper extends MastodonRipper { + public BaraagRipper(URL url) throws IOException { + super(url); + } + + @Override + public String getHost() { + return "baraag"; + } + + @Override + public String getDomain() { + return "baraag.net"; + } +} diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/MastodonRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/MastodonRipper.java new file mode 100644 index 00000000..53f9401c --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/MastodonRipper.java @@ -0,0 +1,96 @@ +package com.rarchives.ripme.ripper.rippers; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import org.json.JSONObject; +import org.json.JSONArray; + +import com.rarchives.ripme.ripper.AbstractHTMLRipper; +import com.rarchives.ripme.utils.Http; + +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +public class MastodonRipper extends AbstractHTMLRipper { + private Map itemIDs = Collections.synchronizedMap(new HashMap()); + + public MastodonRipper(URL url) throws IOException { + super(url); + } + + @Override + public String getHost() { + return "mastodon"; + } + + @Override + public String getDomain() { + return "mastodon.social"; + } + + @Override + public String getGID(URL url) throws MalformedURLException { + Pattern p = Pattern.compile("^https?://(" + getDomain() + ")/@([a-zA-Z0-9_-]+)(/media/?)?$"); + Matcher m = p.matcher(url.toExternalForm()); + if (m.matches()) { + // Return the text contained between () in the regex + return m.group(1) + "@" + m.group(2); + } + throw new MalformedURLException( + "Expected " + getDomain() + " URL format: " + + getDomain() + "/@username - got " + url + " instead"); + } + + @Override + public Document getFirstPage() throws IOException { + Pattern p = Pattern.compile("^/@[a-zA-Z0-9_-]+/media/?$"); + Matcher m = p.matcher(url.getPath()); + if (m.matches()) { + return Http.url(url).get(); + } + return Http.url(url.toExternalForm().replaceAll("/$", "") + "/media").get(); + } + + @Override + public Document getNextPage(Document doc) throws IOException { + Elements hrefs = doc.select(".h-entry + .entry > a.load-more.load-gap"); + if (hrefs.isEmpty()) { + throw new IOException("No more pages"); + } + String nextUrl = hrefs.last().attr("href"); + sleep(500); + return Http.url(nextUrl).get(); + } + + @Override + public List getURLsFromPage(Document doc) { + List result = new ArrayList(); + for (Element el : doc.select("[data-component=\"MediaGallery\"]")) { + String props = el.attr("data-props"); + JSONObject obj = new JSONObject(props); + JSONArray arr = obj.getJSONArray("media"); + for (int i = 0; i < arr.length(); i++) { + String url = arr.getJSONObject(i).getString("url"); + result.add(url); + String id = arr.getJSONObject(i).getString("id"); + itemIDs.put(url, id); + } + } + return result; + } + + @Override + public void downloadURL(URL url, int index) { + addURLToDownload(url, itemIDs.get(url.toString()) + "_"); + } +} diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/MastodonXyzRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/MastodonXyzRipper.java new file mode 100644 index 00000000..8b621526 --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/MastodonXyzRipper.java @@ -0,0 +1,20 @@ +package com.rarchives.ripme.ripper.rippers; + +import java.io.IOException; +import java.net.URL; + +public class MastodonXyzRipper extends MastodonRipper { + public MastodonXyzRipper(URL url) throws IOException { + super(url); + } + + @Override + public String getHost() { + return "mastodonxyz"; + } + + @Override + public String getDomain() { + return "mastodon.xyz"; + } +} diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/PawooRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/PawooRipper.java new file mode 100644 index 00000000..8f5c8c37 --- /dev/null +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/PawooRipper.java @@ -0,0 +1,20 @@ +package com.rarchives.ripme.ripper.rippers; + +import java.io.IOException; +import java.net.URL; + +public class PawooRipper extends MastodonRipper { + public PawooRipper(URL url) throws IOException { + super(url); + } + + @Override + public String getHost() { + return "pawoo"; + } + + @Override + public String getDomain() { + return "pawoo.net"; + } +} From c9759f23d26e17b8615aac2fb38dfcbf1d0d9e81 Mon Sep 17 00:00:00 2001 From: 0x1f595 <0x1f595@users.noreply.github.com> Date: Tue, 8 Oct 2019 21:55:08 -0600 Subject: [PATCH 2/2] Adding unit tests for Mastodon rippers --- .../tst/ripper/rippers/ArtAlleyRipperTest.java | 15 +++++++++++++++ .../tst/ripper/rippers/BaraagRipperTest.java | 15 +++++++++++++++ .../tst/ripper/rippers/MastodonRipperTest.java | 15 +++++++++++++++ .../tst/ripper/rippers/MastodonXyzRipperTest.java | 15 +++++++++++++++ .../ripme/tst/ripper/rippers/PawooRipperTest.java | 15 +++++++++++++++ 5 files changed, 75 insertions(+) create mode 100644 src/test/java/com/rarchives/ripme/tst/ripper/rippers/ArtAlleyRipperTest.java create mode 100644 src/test/java/com/rarchives/ripme/tst/ripper/rippers/BaraagRipperTest.java create mode 100644 src/test/java/com/rarchives/ripme/tst/ripper/rippers/MastodonRipperTest.java create mode 100644 src/test/java/com/rarchives/ripme/tst/ripper/rippers/MastodonXyzRipperTest.java create mode 100644 src/test/java/com/rarchives/ripme/tst/ripper/rippers/PawooRipperTest.java diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/ArtAlleyRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/ArtAlleyRipperTest.java new file mode 100644 index 00000000..693ce619 --- /dev/null +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/ArtAlleyRipperTest.java @@ -0,0 +1,15 @@ +package com.rarchives.ripme.tst.ripper.rippers; + +import java.io.IOException; +import java.net.URL; + +import com.rarchives.ripme.ripper.rippers.ArtAlleyRipper; +import org.junit.jupiter.api.Test; + +public class ArtAlleyRipperTest extends RippersTest { + @Test + public void testRip() throws IOException { + ArtAlleyRipper ripper = new ArtAlleyRipper(new URL("https://artalley.social/@curator/media")); + testRipper(ripper); + } +} diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/BaraagRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/BaraagRipperTest.java new file mode 100644 index 00000000..57105a9a --- /dev/null +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/BaraagRipperTest.java @@ -0,0 +1,15 @@ +package com.rarchives.ripme.tst.ripper.rippers; + +import java.io.IOException; +import java.net.URL; + +import com.rarchives.ripme.ripper.rippers.BaraagRipper; +import org.junit.jupiter.api.Test; + +public class BaraagRipperTest extends RippersTest { + @Test + public void testRip() throws IOException { + BaraagRipper ripper = new BaraagRipper(new URL("https://baraag.net/@darkshadow777/media")); + testRipper(ripper); + } +} diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/MastodonRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/MastodonRipperTest.java new file mode 100644 index 00000000..4b564300 --- /dev/null +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/MastodonRipperTest.java @@ -0,0 +1,15 @@ +package com.rarchives.ripme.tst.ripper.rippers; + +import java.io.IOException; +import java.net.URL; + +import com.rarchives.ripme.ripper.rippers.MastodonRipper; +import org.junit.jupiter.api.Test; + +public class MastodonRipperTest extends RippersTest { + @Test + public void testRip() throws IOException { + MastodonRipper ripper = new MastodonRipper(new URL("https://mastodon.social/@pythonhub/media")); + testRipper(ripper); + } +} diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/MastodonXyzRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/MastodonXyzRipperTest.java new file mode 100644 index 00000000..2f500d6a --- /dev/null +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/MastodonXyzRipperTest.java @@ -0,0 +1,15 @@ +package com.rarchives.ripme.tst.ripper.rippers; + +import java.io.IOException; +import java.net.URL; + +import com.rarchives.ripme.ripper.rippers.MastodonXyzRipper; +import org.junit.jupiter.api.Test; + +public class MastodonXyzRipperTest extends RippersTest { + @Test + public void testRip() throws IOException { + MastodonXyzRipper ripper = new MastodonXyzRipper(new URL("https://mastodon.xyz/@artwo/media")); + testRipper(ripper); + } +} diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/PawooRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/PawooRipperTest.java new file mode 100644 index 00000000..730a965c --- /dev/null +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/PawooRipperTest.java @@ -0,0 +1,15 @@ +package com.rarchives.ripme.tst.ripper.rippers; + +import java.io.IOException; +import java.net.URL; + +import com.rarchives.ripme.ripper.rippers.PawooRipper; +import org.junit.jupiter.api.Test; + +public class PawooRipperTest extends RippersTest { + @Test + public void testRip() throws IOException { + PawooRipper ripper = new PawooRipper(new URL("https://pawoo.net/@halki/media")); + testRipper(ripper); + } +}