1
0
mirror of https://github.com/pirate/ArchiveBox.git synced 2025-08-15 11:04:17 +02:00

rename OUTPUT_DIR to DATA_DIR

This commit is contained in:
Nick Sweeting
2024-09-30 17:44:18 -07:00
parent 363a499289
commit b913e6f426
28 changed files with 128 additions and 138 deletions

View File

@@ -16,7 +16,7 @@ if str(PACKAGE_DIR) not in sys.path:
from .config.constants import CONSTANTS, VERSION, PACKAGE_DIR, DATA_DIR, ARCHIVE_DIR # noqa from .config.constants import CONSTANTS, VERSION, PACKAGE_DIR, DATA_DIR, ARCHIVE_DIR # noqa
os.environ['OUTPUT_DIR'] = str(DATA_DIR) os.environ['ARCHIVEBOX_DATA_DIR'] = str(DATA_DIR)
os.environ['DJANGO_SETTINGS_MODULE'] = 'core.settings' os.environ['DJANGO_SETTINGS_MODULE'] = 'core.settings'
# print('INSTALLING MONKEY PATCHES') # print('INSTALLING MONKEY PATCHES')

View File

@@ -8,10 +8,11 @@ import argparse
from typing import List, Optional, IO from typing import List, Optional, IO
from ..main import add
from archivebox.misc.util import docstring from archivebox.misc.util import docstring
from archivebox.config import DATA_DIR, ARCHIVING_CONFIG
from ..main import add
from ..parsers import PARSERS from ..parsers import PARSERS
from ..config.legacy import OUTPUT_DIR, ONLY_NEW
from ..logging_util import SmartFormatter, accept_stdin, stderr from ..logging_util import SmartFormatter, accept_stdin, stderr
@@ -32,7 +33,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
parser.add_argument( parser.add_argument(
'--update', #'-u', '--update', #'-u',
action='store_true', action='store_true',
default=not ONLY_NEW, # when ONLY_NEW=True we skip updating old links default=not ARCHIVING_CONFIG.ONLY_NEW, # when ONLY_NEW=True we skip updating old links
help="Also retry previously skipped/failed links when adding new links", help="Also retry previously skipped/failed links when adding new links",
) )
parser.add_argument( parser.add_argument(
@@ -117,7 +118,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
init=command.init, init=command.init,
extractors=command.extract, extractors=command.extract,
parser=command.parser, parser=command.parser,
out_dir=pwd or OUTPUT_DIR, out_dir=pwd or DATA_DIR,
) )

View File

@@ -5,12 +5,13 @@ __command__ = 'archivebox config'
import sys import sys
import argparse import argparse
from pathlib import Path
from typing import Optional, List, IO from typing import Optional, List, IO
from ..main import config
from archivebox.misc.util import docstring from archivebox.misc.util import docstring
from ..config.legacy import OUTPUT_DIR from archivebox.config import DATA_DIR
from ..main import config
from ..logging_util import SmartFormatter, accept_stdin from ..logging_util import SmartFormatter, accept_stdin
@@ -56,7 +57,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
get=command.get, get=command.get,
set=command.set, set=command.set,
reset=command.reset, reset=command.reset,
out_dir=pwd or OUTPUT_DIR, out_dir=Path(pwd) if pwd else DATA_DIR,
) )

View File

@@ -5,12 +5,12 @@ __command__ = 'archivebox help'
import sys import sys
import argparse import argparse
from pathlib import Path
from typing import Optional, List, IO from typing import Optional, List, IO
from ..main import help
from archivebox.misc.util import docstring from archivebox.misc.util import docstring
from ..config.legacy import OUTPUT_DIR from archivebox.config import DATA_DIR
from ..main import help
from ..logging_util import SmartFormatter, reject_stdin from ..logging_util import SmartFormatter, reject_stdin
@@ -25,7 +25,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
parser.parse_args(args or ()) parser.parse_args(args or ())
reject_stdin(__command__, stdin) reject_stdin(__command__, stdin)
help(out_dir=pwd or OUTPUT_DIR) help(out_dir=Path(pwd) if pwd else DATA_DIR)
if __name__ == '__main__': if __name__ == '__main__':

View File

@@ -10,7 +10,7 @@ from typing import Optional, List, IO
from ..main import init from ..main import init
from archivebox.misc.util import docstring from archivebox.misc.util import docstring
from ..config.legacy import OUTPUT_DIR from archivebox.config import DATA_DIR
from ..logging_util import SmartFormatter, reject_stdin from ..logging_util import SmartFormatter, reject_stdin
@@ -44,7 +44,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
force=command.force, force=command.force,
quick=command.quick, quick=command.quick,
setup=command.setup, setup=command.setup,
out_dir=pwd or OUTPUT_DIR, out_dir=pwd or DATA_DIR,
) )

View File

@@ -5,12 +5,12 @@ __command__ = 'archivebox list'
import sys import sys
import argparse import argparse
from pathlib import Path
from typing import Optional, List, IO from typing import Optional, List, IO
from ..main import list_all
from archivebox.misc.util import docstring from archivebox.misc.util import docstring
from ..config.legacy import OUTPUT_DIR from archivebox.config import DATA_DIR
from ..main import list_all
from ..index import ( from ..index import (
LINK_FILTERS, LINK_FILTERS,
get_indexed_folders, get_indexed_folders,
@@ -131,7 +131,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
json=command.json, json=command.json,
html=command.html, html=command.html,
with_headers=command.with_headers, with_headers=command.with_headers,
out_dir=pwd or OUTPUT_DIR, out_dir=Path(pwd) if pwd else DATA_DIR,
) )
raise SystemExit(not matching_folders) raise SystemExit(not matching_folders)

View File

@@ -4,19 +4,19 @@ __package__ = 'archivebox.cli'
__command__ = 'archivebox manage' __command__ = 'archivebox manage'
import sys import sys
from pathlib import Path
from typing import Optional, List, IO from typing import Optional, List, IO
from ..main import manage
from archivebox.misc.util import docstring from archivebox.misc.util import docstring
from ..config.legacy import OUTPUT_DIR from archivebox.config import DATA_DIR
from ..main import manage
@docstring(manage.__doc__) @docstring(manage.__doc__)
def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None: def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:
manage( manage(
args=args, args=args,
out_dir=pwd or OUTPUT_DIR, out_dir=Path(pwd) if pwd else DATA_DIR,
) )

View File

@@ -9,10 +9,10 @@ import argparse
from pathlib import Path from pathlib import Path
from typing import List, Optional, IO from typing import List, Optional, IO
from ..main import oneshot
from archivebox.misc.util import docstring from archivebox.misc.util import docstring
from ..config.legacy import OUTPUT_DIR from archivebox.config import DATA_DIR
from ..logging_util import SmartFormatter, accept_stdin, stderr from ..logging_util import SmartFormatter, accept_stdin, stderr
from ..main import oneshot
@docstring(oneshot.__doc__) @docstring(oneshot.__doc__)
@@ -46,7 +46,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
parser.add_argument( parser.add_argument(
'--out-dir', '--out-dir',
type=str, type=str,
default=OUTPUT_DIR, default=DATA_DIR,
help= "Path to save the single archive folder to, e.g. ./example.com_archive" help= "Path to save the single archive folder to, e.g. ./example.com_archive"
) )
command = parser.parse_args(args or ()) command = parser.parse_args(args or ())

View File

@@ -5,13 +5,13 @@ __command__ = 'archivebox remove'
import sys import sys
import argparse import argparse
from pathlib import Path
from typing import Optional, List, IO from typing import Optional, List, IO
from ..main import remove
from archivebox.misc.util import docstring from archivebox.misc.util import docstring
from ..config.legacy import OUTPUT_DIR from archivebox.config import DATA_DIR
from ..logging_util import SmartFormatter, accept_stdin from ..logging_util import SmartFormatter, accept_stdin
from ..main import remove
@docstring(remove.__doc__) @docstring(remove.__doc__)
@@ -74,7 +74,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
after=command.after, after=command.after,
yes=command.yes, yes=command.yes,
delete=command.delete, delete=command.delete,
out_dir=pwd or OUTPUT_DIR, out_dir=Path(pwd) if pwd else DATA_DIR,
) )

View File

@@ -5,13 +5,13 @@ __command__ = 'archivebox schedule'
import sys import sys
import argparse import argparse
from pathlib import Path
from typing import Optional, List, IO from typing import Optional, List, IO
from ..main import schedule
from archivebox.misc.util import docstring from archivebox.misc.util import docstring
from ..config.legacy import OUTPUT_DIR from archivebox.config import DATA_DIR
from ..logging_util import SmartFormatter, reject_stdin from ..logging_util import SmartFormatter, reject_stdin
from ..main import schedule
@docstring(schedule.__doc__) @docstring(schedule.__doc__)
@@ -108,7 +108,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
overwrite=command.overwrite, overwrite=command.overwrite,
update=command.update, update=command.update,
import_path=command.import_path, import_path=command.import_path,
out_dir=pwd or OUTPUT_DIR, out_dir=Path(pwd) if pwd else DATA_DIR,
) )

View File

@@ -5,13 +5,13 @@ __command__ = 'archivebox server'
import sys import sys
import argparse import argparse
from pathlib import Path
from typing import Optional, List, IO from typing import Optional, List, IO
from ..main import server
from archivebox.misc.util import docstring from archivebox.misc.util import docstring
from ..config.legacy import OUTPUT_DIR, BIND_ADDR from archivebox.config import DATA_DIR, SERVER_CONFIG
from ..logging_util import SmartFormatter, reject_stdin from ..logging_util import SmartFormatter, reject_stdin
from ..main import server
@docstring(server.__doc__) @docstring(server.__doc__)
def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None: def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:
@@ -25,7 +25,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
'runserver_args', 'runserver_args',
nargs='*', nargs='*',
type=str, type=str,
default=[BIND_ADDR], default=[SERVER_CONFIG.BIND_ADDR],
help='Arguments to pass to Django runserver' help='Arguments to pass to Django runserver'
) )
parser.add_argument( parser.add_argument(
@@ -68,7 +68,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
init=command.init, init=command.init,
quick_init=command.quick_init, quick_init=command.quick_init,
createsuperuser=command.createsuperuser, createsuperuser=command.createsuperuser,
out_dir=pwd or OUTPUT_DIR, out_dir=Path(pwd) if pwd else DATA_DIR,
) )

View File

@@ -5,13 +5,13 @@ __command__ = 'archivebox setup'
import sys import sys
import argparse import argparse
from pathlib import Path
from typing import Optional, List, IO from typing import Optional, List, IO
from ..main import setup
from archivebox.misc.util import docstring from archivebox.misc.util import docstring
from ..config.legacy import OUTPUT_DIR from archivebox.config import DATA_DIR
from ..logging_util import SmartFormatter, reject_stdin from ..logging_util import SmartFormatter, reject_stdin
from ..main import setup
@docstring(setup.__doc__) @docstring(setup.__doc__)
@@ -32,7 +32,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
setup( setup(
# force=command.force, # force=command.force,
out_dir=pwd or OUTPUT_DIR, out_dir=Path(pwd) if pwd else DATA_DIR,
) )

View File

@@ -5,13 +5,13 @@ __command__ = 'archivebox shell'
import sys import sys
import argparse import argparse
from pathlib import Path
from typing import Optional, List, IO from typing import Optional, List, IO
from ..main import shell
from archivebox.misc.util import docstring from archivebox.misc.util import docstring
from ..config.legacy import OUTPUT_DIR from archivebox.config import DATA_DIR
from ..logging_util import SmartFormatter, reject_stdin from ..logging_util import SmartFormatter, reject_stdin
from ..main import shell
@docstring(shell.__doc__) @docstring(shell.__doc__)
@@ -26,7 +26,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
reject_stdin(__command__, stdin) reject_stdin(__command__, stdin)
shell( shell(
out_dir=pwd or OUTPUT_DIR, out_dir=Path(pwd) if pwd else DATA_DIR,
) )

View File

@@ -5,13 +5,13 @@ __command__ = 'archivebox status'
import sys import sys
import argparse import argparse
from pathlib import Path
from typing import Optional, List, IO from typing import Optional, List, IO
from ..main import status
from archivebox.misc.util import docstring from archivebox.misc.util import docstring
from ..config.legacy import OUTPUT_DIR from archivebox.config import DATA_DIR
from ..logging_util import SmartFormatter, reject_stdin from ..logging_util import SmartFormatter, reject_stdin
from ..main import status
@docstring(status.__doc__) @docstring(status.__doc__)
@@ -25,7 +25,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
parser.parse_args(args or ()) parser.parse_args(args or ())
reject_stdin(__command__, stdin) reject_stdin(__command__, stdin)
status(out_dir=pwd or OUTPUT_DIR) status(out_dir=Path(pwd) if pwd else DATA_DIR)
if __name__ == '__main__': if __name__ == '__main__':

View File

@@ -5,12 +5,11 @@ __command__ = 'archivebox update'
import sys import sys
import argparse import argparse
from pathlib import Path
from typing import List, Optional, IO from typing import List, Optional, IO
from ..main import update
from archivebox.misc.util import docstring from archivebox.misc.util import docstring
from ..config.legacy import OUTPUT_DIR from archivebox.config import DATA_DIR
from ..index import ( from ..index import (
LINK_FILTERS, LINK_FILTERS,
get_indexed_folders, get_indexed_folders,
@@ -25,6 +24,7 @@ from ..index import (
get_unrecognized_folders, get_unrecognized_folders,
) )
from ..logging_util import SmartFormatter, accept_stdin from ..logging_util import SmartFormatter, accept_stdin
from ..main import update
@docstring(update.__doc__) @docstring(update.__doc__)
@@ -127,7 +127,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
status=command.status, status=command.status,
after=command.after, after=command.after,
before=command.before, before=command.before,
out_dir=pwd or OUTPUT_DIR, out_dir=Path(pwd) if pwd else DATA_DIR,
extractors=command.extract, extractors=command.extract,
) )

View File

@@ -5,13 +5,13 @@ __command__ = 'archivebox version'
import sys import sys
import argparse import argparse
from pathlib import Path
from typing import Optional, List, IO from typing import Optional, List, IO
from ..main import version
from archivebox.misc.util import docstring from archivebox.misc.util import docstring
from ..config.legacy import OUTPUT_DIR from archivebox.config import DATA_DIR
from ..logging_util import SmartFormatter, reject_stdin from ..logging_util import SmartFormatter, reject_stdin
from ..main import version
@docstring(version.__doc__) @docstring(version.__doc__)
@@ -32,7 +32,7 @@ def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional
version( version(
quiet=command.quiet, quiet=command.quiet,
out_dir=pwd or OUTPUT_DIR, out_dir=Path(pwd) if pwd else DATA_DIR,
) )

View File

@@ -15,7 +15,7 @@ TEST_CONFIG = {
'USE_COLOR': 'False', 'USE_COLOR': 'False',
'SHOW_PROGRESS': 'False', 'SHOW_PROGRESS': 'False',
'OUTPUT_DIR': 'data.tests', 'DATA_DIR': 'data.tests',
'SAVE_ARCHIVE_DOT_ORG': 'False', 'SAVE_ARCHIVE_DOT_ORG': 'False',
'SAVE_TITLE': 'False', 'SAVE_TITLE': 'False',
@@ -27,12 +27,12 @@ TEST_CONFIG = {
'USE_YOUTUBEDL': 'False', 'USE_YOUTUBEDL': 'False',
} }
OUTPUT_DIR = 'data.tests' DATA_DIR = 'data.tests'
os.environ.update(TEST_CONFIG) os.environ.update(TEST_CONFIG)
from ..main import init from ..main import init
from ..index import load_main_index from ..index import load_main_index
from ..config.legacy import ( from archivebox.config.constants import (
SQL_INDEX_FILENAME, SQL_INDEX_FILENAME,
JSON_INDEX_FILENAME, JSON_INDEX_FILENAME,
HTML_INDEX_FILENAME, HTML_INDEX_FILENAME,
@@ -101,22 +101,22 @@ def output_hidden(show_failing=True):
class TestInit(unittest.TestCase): class TestInit(unittest.TestCase):
def setUp(self): def setUp(self):
os.makedirs(OUTPUT_DIR, exist_ok=True) os.makedirs(DATA_DIR, exist_ok=True)
def tearDown(self): def tearDown(self):
shutil.rmtree(OUTPUT_DIR, ignore_errors=True) shutil.rmtree(DATA_DIR, ignore_errors=True)
def test_basic_init(self): def test_basic_init(self):
with output_hidden(): with output_hidden():
archivebox_init.main([]) archivebox_init.main([])
assert (Path(OUTPUT_DIR) / SQL_INDEX_FILENAME).exists() assert (Path(DATA_DIR) / SQL_INDEX_FILENAME).exists()
assert (Path(OUTPUT_DIR) / JSON_INDEX_FILENAME).exists() assert (Path(DATA_DIR) / JSON_INDEX_FILENAME).exists()
assert (Path(OUTPUT_DIR) / HTML_INDEX_FILENAME).exists() assert (Path(DATA_DIR) / HTML_INDEX_FILENAME).exists()
assert len(load_main_index(out_dir=OUTPUT_DIR)) == 0 assert len(load_main_index(out_dir=DATA_DIR)) == 0
def test_conflicting_init(self): def test_conflicting_init(self):
with open(Path(OUTPUT_DIR) / 'test_conflict.txt', 'w+', encoding='utf-8') as f: with open(Path(DATA_DIR) / 'test_conflict.txt', 'w+', encoding='utf-8') as f:
f.write('test') f.write('test')
try: try:
@@ -126,11 +126,11 @@ class TestInit(unittest.TestCase):
except SystemExit: except SystemExit:
pass pass
assert not (Path(OUTPUT_DIR) / SQL_INDEX_FILENAME).exists() assert not (Path(DATA_DIR) / SQL_INDEX_FILENAME).exists()
assert not (Path(OUTPUT_DIR) / JSON_INDEX_FILENAME).exists() assert not (Path(DATA_DIR) / JSON_INDEX_FILENAME).exists()
assert not (Path(OUTPUT_DIR) / HTML_INDEX_FILENAME).exists() assert not (Path(DATA_DIR) / HTML_INDEX_FILENAME).exists()
try: try:
load_main_index(out_dir=OUTPUT_DIR) load_main_index(out_dir=DATA_DIR)
assert False, 'load_main_index should raise an exception when no index is present' assert False, 'load_main_index should raise an exception when no index is present'
except Exception: except Exception:
pass pass
@@ -138,36 +138,36 @@ class TestInit(unittest.TestCase):
def test_no_dirty_state(self): def test_no_dirty_state(self):
with output_hidden(): with output_hidden():
init() init()
shutil.rmtree(OUTPUT_DIR, ignore_errors=True) shutil.rmtree(DATA_DIR, ignore_errors=True)
with output_hidden(): with output_hidden():
init() init()
class TestAdd(unittest.TestCase): class TestAdd(unittest.TestCase):
def setUp(self): def setUp(self):
os.makedirs(OUTPUT_DIR, exist_ok=True) os.makedirs(DATA_DIR, exist_ok=True)
with output_hidden(): with output_hidden():
init() init()
def tearDown(self): def tearDown(self):
shutil.rmtree(OUTPUT_DIR, ignore_errors=True) shutil.rmtree(DATA_DIR, ignore_errors=True)
def test_add_arg_url(self): def test_add_arg_url(self):
with output_hidden(): with output_hidden():
archivebox_add.main(['https://getpocket.com/users/nikisweeting/feed/all']) archivebox_add.main(['https://getpocket.com/users/nikisweeting/feed/all'])
all_links = load_main_index(out_dir=OUTPUT_DIR) all_links = load_main_index(out_dir=DATA_DIR)
assert len(all_links) == 30 assert len(all_links) == 30
def test_add_arg_file(self): def test_add_arg_file(self):
test_file = Path(OUTPUT_DIR) / 'test.txt' test_file = Path(DATA_DIR) / 'test.txt'
with open(test_file, 'w+', encoding='utf') as f: with open(test_file, 'w+', encoding='utf') as f:
f.write(test_urls) f.write(test_urls)
with output_hidden(): with output_hidden():
archivebox_add.main([test_file]) archivebox_add.main([test_file])
all_links = load_main_index(out_dir=OUTPUT_DIR) all_links = load_main_index(out_dir=DATA_DIR)
assert len(all_links) == 12 assert len(all_links) == 12
os.remove(test_file) os.remove(test_file)
@@ -175,40 +175,40 @@ class TestAdd(unittest.TestCase):
with output_hidden(): with output_hidden():
archivebox_add.main([], stdin=test_urls) archivebox_add.main([], stdin=test_urls)
all_links = load_main_index(out_dir=OUTPUT_DIR) all_links = load_main_index(out_dir=DATA_DIR)
assert len(all_links) == 12 assert len(all_links) == 12
class TestRemove(unittest.TestCase): class TestRemove(unittest.TestCase):
def setUp(self): def setUp(self):
os.makedirs(OUTPUT_DIR, exist_ok=True) os.makedirs(DATA_DIR, exist_ok=True)
with output_hidden(): with output_hidden():
init() init()
archivebox_add.main([], stdin=test_urls) archivebox_add.main([], stdin=test_urls)
# def tearDown(self): # def tearDown(self):
# shutil.rmtree(OUTPUT_DIR, ignore_errors=True) # shutil.rmtree(DATA_DIR, ignore_errors=True)
def test_remove_exact(self): def test_remove_exact(self):
with output_hidden(): with output_hidden():
archivebox_remove.main(['--yes', '--delete', 'https://example5.com/']) archivebox_remove.main(['--yes', '--delete', 'https://example5.com/'])
all_links = load_main_index(out_dir=OUTPUT_DIR) all_links = load_main_index(out_dir=DATA_DIR)
assert len(all_links) == 11 assert len(all_links) == 11
def test_remove_regex(self): def test_remove_regex(self):
with output_hidden(): with output_hidden():
archivebox_remove.main(['--yes', '--delete', '--filter-type=regex', r'http(s)?:\/\/(.+\.)?(example\d\.com)']) archivebox_remove.main(['--yes', '--delete', '--filter-type=regex', r'http(s)?:\/\/(.+\.)?(example\d\.com)'])
all_links = load_main_index(out_dir=OUTPUT_DIR) all_links = load_main_index(out_dir=DATA_DIR)
assert len(all_links) == 4 assert len(all_links) == 4
def test_remove_domain(self): def test_remove_domain(self):
with output_hidden(): with output_hidden():
archivebox_remove.main(['--yes', '--delete', '--filter-type=domain', 'example5.com', 'example6.com']) archivebox_remove.main(['--yes', '--delete', '--filter-type=domain', 'example5.com', 'example6.com'])
all_links = load_main_index(out_dir=OUTPUT_DIR) all_links = load_main_index(out_dir=DATA_DIR)
assert len(all_links) == 10 assert len(all_links) == 10
def test_remove_none(self): def test_remove_none(self):

View File

@@ -36,7 +36,6 @@ class ConfigDict(BaseConfig, benedict, total=False):
IN_DOCKER: bool IN_DOCKER: bool
PACKAGE_DIR: Path PACKAGE_DIR: Path
OUTPUT_DIR: Path
CONFIG_FILE: Path CONFIG_FILE: Path
ONLY_NEW: bool ONLY_NEW: bool
TIMEOUT: int TIMEOUT: int

View File

@@ -60,7 +60,6 @@ class ConstantsDict(Mapping):
LIB_DIR_NAME: str = 'lib' LIB_DIR_NAME: str = 'lib'
TMP_DIR_NAME: str = 'tmp' TMP_DIR_NAME: str = 'tmp'
OUTPUT_DIR: Path = DATA_DIR
ARCHIVE_DIR: Path = DATA_DIR / ARCHIVE_DIR_NAME ARCHIVE_DIR: Path = DATA_DIR / ARCHIVE_DIR_NAME
SOURCES_DIR: Path = DATA_DIR / SOURCES_DIR_NAME SOURCES_DIR: Path = DATA_DIR / SOURCES_DIR_NAME
PERSONAS_DIR: Path = DATA_DIR / PERSONAS_DIR_NAME PERSONAS_DIR: Path = DATA_DIR / PERSONAS_DIR_NAME

View File

@@ -44,7 +44,7 @@ import django
from django.db.backends.sqlite3.base import Database as sqlite3 from django.db.backends.sqlite3.base import Database as sqlite3
from .constants import CONSTANTS, TIMEZONE, OUTPUT_DIR from .constants import CONSTANTS, TIMEZONE
from .constants import * from .constants import *
from .config_stubs import ( from .config_stubs import (
ConfigValue, ConfigValue,
@@ -57,8 +57,9 @@ from ..misc.logging import (
) )
from .defaults import SHELL_CONFIG, GENERAL_CONFIG, ARCHIVING_CONFIG, SERVER_CONFIG, SEARCH_BACKEND_CONFIG, STORAGE_CONFIG from .defaults import SHELL_CONFIG, GENERAL_CONFIG, ARCHIVING_CONFIG, SERVER_CONFIG, SEARCH_BACKEND_CONFIG, STORAGE_CONFIG
from ..plugins_auth.ldap.apps import LDAP_CONFIG from archivebox.plugins_auth.ldap.apps import LDAP_CONFIG
from ..plugins_extractor.favicon.apps import FAVICON_CONFIG from archivebox.plugins_extractor.favicon.apps import FAVICON_CONFIG
ANSI = SHELL_CONFIG.ANSI ANSI = SHELL_CONFIG.ANSI
LDAP = LDAP_CONFIG.LDAP_ENABLED LDAP = LDAP_CONFIG.LDAP_ENABLED
@@ -331,7 +332,7 @@ def load_config_val(key: str,
def load_config_file(out_dir: str | None=CONSTANTS.DATA_DIR) -> Optional[benedict]: def load_config_file(out_dir: str | None=CONSTANTS.DATA_DIR) -> Optional[benedict]:
"""load the ini-formatted config file from OUTPUT_DIR/Archivebox.conf""" """load the ini-formatted config file from DATA_DIR/Archivebox.conf"""
config_path = CONSTANTS.CONFIG_FILE config_path = CONSTANTS.CONFIG_FILE
if config_path.exists(): if config_path.exists():
@@ -351,7 +352,7 @@ def load_config_file(out_dir: str | None=CONSTANTS.DATA_DIR) -> Optional[benedic
def write_config_file(config: Dict[str, str], out_dir: str | None=CONSTANTS.DATA_DIR) -> benedict: def write_config_file(config: Dict[str, str], out_dir: str | None=CONSTANTS.DATA_DIR) -> benedict:
"""load the ini-formatted config file from OUTPUT_DIR/Archivebox.conf""" """load the ini-formatted config file from DATA_DIR/Archivebox.conf"""
from archivebox.misc.system import atomic_write from archivebox.misc.system import atomic_write
@@ -785,7 +786,7 @@ def bump_startup_progress_bar():
def setup_django_minimal(): def setup_django_minimal():
# sys.path.append(str(CONSTANTS.PACKAGE_DIR)) # sys.path.append(str(CONSTANTS.PACKAGE_DIR))
# os.environ.setdefault('OUTPUT_DIR', str(CONSTANTS.DATA_DIR)) # os.environ.setdefault('ARCHIVEBOX_DATA_DIR', str(CONSTANTS.DATA_DIR))
# os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings') # os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'core.settings')
# django.setup() # django.setup()
raise Exception('dont use this anymore') raise Exception('dont use this anymore')

View File

@@ -21,8 +21,7 @@ from django import forms
from signal_webhooks.admin import WebhookAdmin from signal_webhooks.admin import WebhookAdmin
from signal_webhooks.utils import get_webhook_model from signal_webhooks.utils import get_webhook_model
from archivebox.config import VERSION from archivebox.config import VERSION, DATA_DIR
from archivebox.misc.util import htmldecode, urldecode from archivebox.misc.util import htmldecode, urldecode
from core.models import Snapshot, ArchiveResult, Tag from core.models import Snapshot, ArchiveResult, Tag
@@ -536,11 +535,11 @@ class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
links = [snapshot.as_link() for snapshot in queryset] links = [snapshot.as_link() for snapshot in queryset]
if len(links) < 3: if len(links) < 3:
# run syncronously if there are only 1 or 2 links # run syncronously if there are only 1 or 2 links
archive_links(links, overwrite=True, methods=('title','favicon'), out_dir=CONFIG.OUTPUT_DIR) archive_links(links, overwrite=True, methods=('title','favicon'), out_dir=DATA_DIR)
messages.success(request, f"Title and favicon have been fetched and saved for {len(links)} URLs.") messages.success(request, f"Title and favicon have been fetched and saved for {len(links)} URLs.")
else: else:
# otherwise run in a background worker # otherwise run in a background worker
result = bg_archive_links((links,), kwargs={"overwrite": True, "methods": ["title", "favicon"], "out_dir": CONFIG.OUTPUT_DIR}) result = bg_archive_links((links,), kwargs={"overwrite": True, "methods": ["title", "favicon"], "out_dir": DATA_DIR})
messages.success( messages.success(
request, request,
mark_safe(f"Title and favicon are updating in the background for {len(links)} URLs. {result_url(result)}"), mark_safe(f"Title and favicon are updating in the background for {len(links)} URLs. {result_url(result)}"),
@@ -552,7 +551,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
def update_snapshots(self, request, queryset): def update_snapshots(self, request, queryset):
links = [snapshot.as_link() for snapshot in queryset] links = [snapshot.as_link() for snapshot in queryset]
result = bg_archive_links((links,), kwargs={"overwrite": False, "out_dir": CONFIG.OUTPUT_DIR}) result = bg_archive_links((links,), kwargs={"overwrite": False, "out_dir": DATA_DIR})
messages.success( messages.success(
request, request,
@@ -581,7 +580,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
def overwrite_snapshots(self, request, queryset): def overwrite_snapshots(self, request, queryset):
links = [snapshot.as_link() for snapshot in queryset] links = [snapshot.as_link() for snapshot in queryset]
result = bg_archive_links((links,), kwargs={"overwrite": True, "out_dir": CONFIG.OUTPUT_DIR}) result = bg_archive_links((links,), kwargs={"overwrite": True, "out_dir": DATA_DIR})
messages.success( messages.success(
request, request,
@@ -592,7 +591,7 @@ class SnapshotAdmin(SearchResultsAdminMixin, ABIDModelAdmin):
description="☠️ Delete" description="☠️ Delete"
) )
def delete_snapshots(self, request, queryset): def delete_snapshots(self, request, queryset):
remove(snapshots=queryset, yes=True, delete=True, out_dir=CONFIG.OUTPUT_DIR) remove(snapshots=queryset, yes=True, delete=True, out_dir=DATA_DIR)
messages.success( messages.success(
request, request,
mark_safe(f"Succesfully deleted {queryset.count()} Snapshots. Don't forget to scrub URLs from import logs (data/sources) and error logs (data/logs) if needed."), mark_safe(f"Succesfully deleted {queryset.count()} Snapshots. Don't forget to scrub URLs from import logs (data/sources) and error logs (data/logs) if needed."),
@@ -732,7 +731,7 @@ class ArchiveResultAdmin(ABIDModelAdmin):
) )
def output_summary(self, result): def output_summary(self, result):
snapshot_dir = Path(CONFIG.OUTPUT_DIR) / str(result.pwd).split('data/', 1)[-1] snapshot_dir = Path(DATA_DIR) / str(result.pwd).split('data/', 1)[-1]
output_str = format_html( output_str = format_html(
'<pre style="display: inline-block">{}</pre><br/>', '<pre style="display: inline-block">{}</pre><br/>',
result.output, result.output,

View File

@@ -243,7 +243,7 @@ def write_main_index(links: List[Link], out_dir: Path=DATA_DIR, created_by_id: i
log_indexing_process_finished() log_indexing_process_finished()
@enforce_types @enforce_types
def load_main_index(out_dir: Path=DATA_DIR, warn: bool=True) -> List[Link]: def load_main_index(out_dir: Path | str=DATA_DIR, warn: bool=True) -> List[Link]:
"""parse and load existing index with any new links from import_path merged in""" """parse and load existing index with any new links from import_path merged in"""
from core.models import Snapshot from core.models import Snapshot
try: try:

View File

@@ -8,18 +8,15 @@ from typing import List, Tuple, Iterator
from django.db.models import QuerySet from django.db.models import QuerySet
from django.db import transaction from django.db import transaction
from .schema import Link
from archivebox.misc.util import enforce_types, parse_date from archivebox.misc.util import enforce_types, parse_date
from ..config.legacy import ( from archivebox.config import DATA_DIR, GENERAL_CONFIG
OUTPUT_DIR,
TAG_SEPARATOR_PATTERN,
)
from .schema import Link
### Main Links Index ### Main Links Index
@enforce_types @enforce_types
def parse_sql_main_index(out_dir: Path=OUTPUT_DIR) -> Iterator[Link]: def parse_sql_main_index(out_dir: Path=DATA_DIR) -> Iterator[Link]:
from core.models import Snapshot from core.models import Snapshot
return ( return (
@@ -28,7 +25,7 @@ def parse_sql_main_index(out_dir: Path=OUTPUT_DIR) -> Iterator[Link]:
) )
@enforce_types @enforce_types
def remove_from_sql_main_index(snapshots: QuerySet, atomic: bool=False, out_dir: Path=OUTPUT_DIR) -> None: def remove_from_sql_main_index(snapshots: QuerySet, atomic: bool=False, out_dir: Path=DATA_DIR) -> None:
if atomic: if atomic:
with transaction.atomic(): with transaction.atomic():
return snapshots.delete() return snapshots.delete()
@@ -44,7 +41,7 @@ def write_link_to_sql_index(link: Link, created_by_id: int | None=None):
info['created_by_id'] = created_by_id or get_or_create_system_user_pk() info['created_by_id'] = created_by_id or get_or_create_system_user_pk()
tag_list = list(dict.fromkeys( tag_list = list(dict.fromkeys(
tag.strip() for tag in re.split(TAG_SEPARATOR_PATTERN, link.tags or '') tag.strip() for tag in re.split(GENERAL_CONFIG.TAG_SEPARATOR_PATTERN, link.tags or '')
)) ))
info.pop('tags') info.pop('tags')
@@ -95,7 +92,7 @@ def write_link_to_sql_index(link: Link, created_by_id: int | None=None):
@enforce_types @enforce_types
def write_sql_main_index(links: List[Link], out_dir: Path=OUTPUT_DIR, created_by_id: int | None=None) -> None: def write_sql_main_index(links: List[Link], out_dir: Path=DATA_DIR, created_by_id: int | None=None) -> None:
for link in links: for link in links:
# with transaction.atomic(): # with transaction.atomic():
# write_link_to_sql_index(link) # write_link_to_sql_index(link)
@@ -103,7 +100,7 @@ def write_sql_main_index(links: List[Link], out_dir: Path=OUTPUT_DIR, created_by
@enforce_types @enforce_types
def write_sql_link_details(link: Link, out_dir: Path=OUTPUT_DIR, created_by_id: int | None=None) -> None: def write_sql_link_details(link: Link, out_dir: Path=DATA_DIR, created_by_id: int | None=None) -> None:
from core.models import Snapshot from core.models import Snapshot
# with transaction.atomic(): # with transaction.atomic():
@@ -120,7 +117,7 @@ def write_sql_link_details(link: Link, out_dir: Path=OUTPUT_DIR, created_by_id:
snap.title = link.title snap.title = link.title
tag_list = list( tag_list = list(
{tag.strip() for tag in re.split(TAG_SEPARATOR_PATTERN, link.tags or '')} {tag.strip() for tag in re.split(GENERAL_CONFIG.TAG_SEPARATOR_PATTERN, link.tags or '')}
| set(snap.tags.values_list('name', flat=True)) | set(snap.tags.values_list('name', flat=True))
) )
@@ -130,7 +127,7 @@ def write_sql_link_details(link: Link, out_dir: Path=OUTPUT_DIR, created_by_id:
@enforce_types @enforce_types
def list_migrations(out_dir: Path=OUTPUT_DIR) -> List[Tuple[bool, str]]: def list_migrations(out_dir: Path=DATA_DIR) -> List[Tuple[bool, str]]:
from django.core.management import call_command from django.core.management import call_command
out = StringIO() out = StringIO()
call_command("showmigrations", list=True, stdout=out) call_command("showmigrations", list=True, stdout=out)
@@ -146,7 +143,7 @@ def list_migrations(out_dir: Path=OUTPUT_DIR) -> List[Tuple[bool, str]]:
return migrations return migrations
@enforce_types @enforce_types
def apply_migrations(out_dir: Path=OUTPUT_DIR) -> List[str]: def apply_migrations(out_dir: Path=DATA_DIR) -> List[str]:
from django.core.management import call_command from django.core.management import call_command
out1, out2 = StringIO(), StringIO() out1, out2 = StringIO(), StringIO()
@@ -160,6 +157,6 @@ def apply_migrations(out_dir: Path=OUTPUT_DIR) -> List[str]:
] ]
@enforce_types @enforce_types
def get_admins(out_dir: Path=OUTPUT_DIR) -> List[str]: def get_admins(out_dir: Path=DATA_DIR) -> List[str]:
from django.contrib.auth.models import User from django.contrib.auth.models import User
return User.objects.filter(is_superuser=True) return User.objects.filter(is_superuser=True)

View File

@@ -13,7 +13,6 @@ IN_DOCKER=False
IN_QEMU=False IN_QEMU=False
PUID=501 PUID=501
PGID=20 PGID=20
OUTPUT_DIR=/opt/archivebox/data
CONFIG_FILE=/opt/archivebox/data/ArchiveBox.conf CONFIG_FILE=/opt/archivebox/data/ArchiveBox.conf
ONLY_NEW=True ONLY_NEW=True
TIMEOUT=60 TIMEOUT=60
@@ -173,7 +172,6 @@ IN_DOCKER = false
IN_QEMU = false IN_QEMU = false
PUID = 501 PUID = 501
PGID = 20 PGID = 20
OUTPUT_DIR = "/opt/archivebox/data"
CONFIG_FILE = "/opt/archivebox/data/ArchiveBox.conf" CONFIG_FILE = "/opt/archivebox/data/ArchiveBox.conf"
ONLY_NEW = true ONLY_NEW = true
TIMEOUT = 60 TIMEOUT = 60

View File

@@ -13,21 +13,16 @@ from typing import IO, Tuple, List, Optional
from datetime import datetime, timezone from datetime import datetime, timezone
from pathlib import Path from pathlib import Path
from archivebox.config import DATA_DIR, CONSTANTS, SHELL_CONFIG, ARCHIVING_CONFIG
from archivebox.misc.system import atomic_write from archivebox.misc.system import atomic_write
from ..config.legacy import ( from archivebox.misc.logging import stderr, hint
ANSI,
OUTPUT_DIR,
SOURCES_DIR_NAME,
TIMEOUT,
stderr,
hint,
)
from archivebox.misc.util import ( from archivebox.misc.util import (
basename, basename,
htmldecode, htmldecode,
download_url, download_url,
enforce_types, enforce_types,
) )
from ..index.schema import Link from ..index.schema import Link
from ..logging_util import TimedProgress, log_source_saved from ..logging_util import TimedProgress, log_source_saved
@@ -38,7 +33,6 @@ from . import pocket_html
from . import pinboard_rss from . import pinboard_rss
from . import shaarli_rss from . import shaarli_rss
from . import medium_rss from . import medium_rss
from . import netscape_html from . import netscape_html
from . import generic_rss from . import generic_rss
from . import generic_json from . import generic_json
@@ -79,7 +73,7 @@ def parse_links_memory(urls: List[str], root_url: Optional[str]=None):
parse a list of URLS without touching the filesystem parse a list of URLS without touching the filesystem
""" """
timer = TimedProgress(TIMEOUT * 4) timer = TimedProgress(ARCHIVING_CONFIG.TIMEOUT * 4)
#urls = list(map(lambda x: x + "\n", urls)) #urls = list(map(lambda x: x + "\n", urls))
file = StringIO() file = StringIO()
file.writelines(urls) file.writelines(urls)
@@ -98,7 +92,7 @@ def parse_links(source_file: str, root_url: Optional[str]=None, parser: str="aut
RSS feed, bookmarks export, or text file RSS feed, bookmarks export, or text file
""" """
timer = TimedProgress(TIMEOUT * 4) timer = TimedProgress(ARCHIVING_CONFIG.TIMEOUT * 4)
with open(source_file, 'r', encoding='utf-8') as file: with open(source_file, 'r', encoding='utf-8') as file:
links, parser = run_parser_functions(file, timer, root_url=root_url, parser=parser) links, parser = run_parser_functions(file, timer, root_url=root_url, parser=parser)
@@ -148,9 +142,9 @@ def run_parser_functions(to_parse: IO[str], timer, root_url: Optional[str]=None,
@enforce_types @enforce_types
def save_text_as_source(raw_text: str, filename: str='{ts}-stdin.txt', out_dir: Path=OUTPUT_DIR) -> str: def save_text_as_source(raw_text: str, filename: str='{ts}-stdin.txt', out_dir: Path=DATA_DIR) -> str:
ts = str(datetime.now(timezone.utc).timestamp()).split('.', 1)[0] ts = str(datetime.now(timezone.utc).timestamp()).split('.', 1)[0]
source_path = str(out_dir / SOURCES_DIR_NAME / filename.format(ts=ts)) source_path = str(CONSTANTS.SOURCES_DIR / filename.format(ts=ts))
referenced_texts = '' referenced_texts = ''
@@ -167,10 +161,10 @@ def save_text_as_source(raw_text: str, filename: str='{ts}-stdin.txt', out_dir:
@enforce_types @enforce_types
def save_file_as_source(path: str, timeout: int=TIMEOUT, filename: str='{ts}-{basename}.txt', out_dir: Path=OUTPUT_DIR) -> str: def save_file_as_source(path: str, timeout: int=ARCHIVING_CONFIG.TIMEOUT, filename: str='{ts}-{basename}.txt', out_dir: Path=DATA_DIR) -> str:
"""download a given url's content into output/sources/domain-<timestamp>.txt""" """download a given url's content into output/sources/domain-<timestamp>.txt"""
ts = str(datetime.now(timezone.utc).timestamp()).split('.', 1)[0] ts = str(datetime.now(timezone.utc).timestamp()).split('.', 1)[0]
source_path = str(OUTPUT_DIR / SOURCES_DIR_NAME / filename.format(basename=basename(path), ts=ts)) source_path = str(CONSTANTS.SOURCES_DIR / filename.format(basename=basename(path), ts=ts))
if any(path.startswith(s) for s in ('http://', 'https://', 'ftp://')): if any(path.startswith(s) for s in ('http://', 'https://', 'ftp://')):
# Source is a URL that needs to be downloaded # Source is a URL that needs to be downloaded
@@ -183,9 +177,9 @@ def save_file_as_source(path: str, timeout: int=TIMEOUT, filename: str='{ts}-{ba
except Exception as e: except Exception as e:
timer.end() timer.end()
print('{}[!] Failed to download {}{}\n'.format( print('{}[!] Failed to download {}{}\n'.format(
ANSI['red'], SHELL_CONFIG.ANSI['red'],
path, path,
ANSI['reset'], SHELL_CONFIG.ANSI['reset'],
)) ))
print(' ', e) print(' ', e)
raise e raise e

View File

@@ -1,10 +1,11 @@
import time
import uuid import uuid
from functools import wraps from functools import wraps
from django.db import connection, transaction from django.db import connection, transaction
from django.utils import timezone from django.utils import timezone
from huey.exceptions import TaskLockedException from huey.exceptions import TaskLockedException
from archivebox.config import CONSTANTS
class SqliteSemaphore: class SqliteSemaphore:
def __init__(self, db_path, table_name, name, value=1, timeout=None): def __init__(self, db_path, table_name, name, value=1, timeout=None):
self.db_path = db_path self.db_path = db_path
@@ -68,7 +69,8 @@ class SqliteSemaphore:
return cursor.rowcount > 0 return cursor.rowcount > 0
LOCKS_DB_PATH = settings.CONFIG.OUTPUT_DIR / 'locks.sqlite3' LOCKS_DB_PATH = CONSTANTS.DATABASE_FILE.parent / 'locks.sqlite3'
def lock_task_semaphore(db_path, table_name, lock_name, value=1, timeout=None): def lock_task_semaphore(db_path, table_name, lock_name, value=1, timeout=None):
""" """

View File

@@ -2,7 +2,6 @@ from pathlib import Path
from archivebox.config import DATA_DIR, CONSTANTS from archivebox.config import DATA_DIR, CONSTANTS
OUTPUT_DIR = DATA_DIR
LOGS_DIR = CONSTANTS.LOGS_DIR LOGS_DIR = CONSTANTS.LOGS_DIR
TMP_DIR = CONSTANTS.TMP_DIR TMP_DIR = CONSTANTS.TMP_DIR

View File

@@ -2,7 +2,7 @@
socket = 127.0.0.1:3031 socket = 127.0.0.1:3031
chdir = ../ chdir = ../
http = 0.0.0.0:8001 http = 0.0.0.0:8001
env = OUTPUT_DIR=./data env = DATA_DIR=./data
wsgi-file = archivebox/core/wsgi.py wsgi-file = archivebox/core/wsgi.py
processes = 4 processes = 4
threads = 1 threads = 1