1
0
mirror of https://github.com/RipMeApp/ripme.git synced 2025-08-14 01:44:12 +02:00

Merge pull request #220 from cyian-1756/ig_regex

Improved instagram regex and added sanitizeURL
This commit is contained in:
cyian-1756
2017-11-18 08:53:09 -05:00
committed by GitHub

View File

@@ -43,6 +43,13 @@ public class InstagramRipper extends AbstractHTMLRipper {
return (url.getHost().endsWith("instagram.com"));
}
@Override
public URL sanitizeURL(URL url) throws MalformedURLException {
URL san_url = new URL(url.toExternalForm().replaceAll("\\?hl=\\S*", ""));
logger.info("sanitized URL is " + san_url.toExternalForm());
return san_url;
}
@Override
public String getGID(URL url) throws MalformedURLException {
Pattern p = Pattern.compile("^https?://instagram.com/([^/]+)/?");
@@ -51,7 +58,7 @@ public class InstagramRipper extends AbstractHTMLRipper {
return m.group(1);
}
p = Pattern.compile("^https?://www.instagram.com/([^/]+)/?");
p = Pattern.compile("^https?://www.instagram.com/([^/]+)/?(?:\\?hl=\\S*)?/?");
m = p.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);