1
0
mirror of https://github.com/pirate/ArchiveBox.git synced 2025-08-17 20:01:44 +02:00

Updated Configuration (markdown)

Nick Sweeting
2021-07-06 23:59:35 -04:00
parent ae6529bc2f
commit 90edcd4227

@@ -109,8 +109,9 @@ When building your exclusion list, you can check whether a given URL matches you
```python
>>> import re
>>> URL_BLACKLIST = r'^http(s)?:\/\/(.+\.)?(youtube\.com)|(amazon\.com)\/.*$' # replace this with your regex to test
>>> test_url = 'https://test.youtube.com/example.php?abc=123'
>>> bool(re.compile(URL_BLACKLIST, re.IGNORECASE | re.UNICODE | re.MULTILINE).search(test_url))
>>> URL_BLACKLIST_PTN = re.compile(URL_BLACKLIST, re.IGNORECASE | re.UNICODE | re.MULTILINE)
>>> bool(URL_BLACKLIST_PTN.search('https://test.youtube.com/example.php?abc=123')) # replace this with the URL to test
True # this URL would not be archived because it matches the exclusion pattern
```
@@ -129,12 +130,12 @@ When building your blacklist, you can check whether a given URL matches your reg
```python
>>> import re
>>> URL_WHITELIST = r'^http(s)?:\/\/(.+)?example\.com\/?.*$' # replace this with your regex to test
>>> test_url = 'https://test.example.com/example.php?abc=123'
>>> bool(re.compile(URL_BLACKLIST, re.IGNORECASE | re.UNICODE | re.MULTILINE).search(test_url))
>>> URL_WHITELIST_PTN = re.compile(URL_BLACKLIST, re.IGNORECASE | re.UNICODE | re.MULTILINE)
>>> bool(URL_WHITELIST_PTN.search('https://test.example.com/example.php?abc=123'))
True # this URL would be archived
>>> test_url = 'https://test.youtube.com/example.php?abc=123'
>>> bool(re.compile(URL_BLACKLIST, re.IGNORECASE | re.UNICODE | re.MULTILINE).search(test_url))
>>> bool(URL_WHITELIST_PTN.search('https://test.youtube.com/example.php?abc=123'))
False # this URL would be excluded from archiving
```