mirror of
https://github.com/pirate/ArchiveBox.git
synced 2025-09-01 18:32:40 +02:00
Merge branch 'master' into readability-extractor
This commit is contained in:
141
Dockerfile
141
Dockerfile
@@ -1,90 +1,107 @@
|
||||
# This is the Dockerfile for ArchiveBox, it includes the following major pieces:
|
||||
# git, curl, wget, python3, youtube-dl, google-chrome-stable, ArchiveBox
|
||||
# This is the Dockerfile for ArchiveBox, it bundles the following dependencies:
|
||||
# python3, ArchiveBox, curl, wget, git, chromium, youtube-dl, single-file
|
||||
# Usage:
|
||||
# docker build . -t archivebox
|
||||
# docker build . -t archivebox --no-cache
|
||||
# docker run -v "$PWD/data":/data archivebox init
|
||||
# docker run -v "$PWD/data":/data archivebox add 'https://example.com'
|
||||
# Documentation:
|
||||
# https://github.com/pirate/ArchiveBox/wiki/Docker#docker
|
||||
# docker run -v "$PWD/data":/data -it archivebox manage createsuperuser
|
||||
# docker run -v "$PWD/data":/data -p 8000:8000 archivebox server
|
||||
|
||||
FROM python:3.8-slim-buster
|
||||
|
||||
LABEL name="archivebox" \
|
||||
maintainer="Nick Sweeting <archivebox-git@sweeting.me>" \
|
||||
description="All-in-one personal internet archiving container"
|
||||
maintainer="Nick Sweeting <archivebox-docker@sweeting.me>" \
|
||||
description="All-in-one personal internet archiving container" \
|
||||
homepage="https://github.com/pirate/ArchiveBox" \
|
||||
documentation="https://github.com/pirate/ArchiveBox/wiki/Docker#docker"
|
||||
|
||||
# System-level base config
|
||||
ENV TZ=UTC \
|
||||
LANGUAGE=en_US:en \
|
||||
LC_ALL=C.UTF-8 \
|
||||
LANG=C.UTF-8 \
|
||||
PYTHONIOENCODING=UTF-8 \
|
||||
PYTHONUNBUFFERED=1 \
|
||||
APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=1 \
|
||||
CODE_PATH=/app \
|
||||
DEBIAN_FRONTEND=noninteractive \
|
||||
APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=1
|
||||
|
||||
# Application-level base config
|
||||
ENV CODE_DIR=/app \
|
||||
VENV_PATH=/venv \
|
||||
DATA_PATH=/data \
|
||||
EXTRA_PATH=/extra
|
||||
DATA_DIR=/data \
|
||||
NODE_DIR=/node \
|
||||
ARCHIVEBOX_USER="archivebox"
|
||||
|
||||
# First install CLI utils and base deps, then Chrome + Fons + nodejs
|
||||
RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections \
|
||||
# Create non-privileged user for archivebox and chrome
|
||||
RUN groupadd --system $ARCHIVEBOX_USER \
|
||||
&& useradd --system --create-home --gid $ARCHIVEBOX_USER --groups audio,video $ARCHIVEBOX_USER
|
||||
|
||||
# Install system dependencies
|
||||
RUN apt-get update -qq \
|
||||
&& apt-get install -qq -y --no-install-recommends \
|
||||
apt-transport-https ca-certificates gnupg2 zlib1g-dev \
|
||||
dumb-init gosu unzip curl \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install apt dependencies
|
||||
RUN apt-get update -qq \
|
||||
&& apt-get install -qq -y --no-install-recommends \
|
||||
wget curl chromium git ffmpeg youtube-dl \
|
||||
fontconfig fonts-ipafont-gothic fonts-wqy-zenhei fonts-thai-tlwg fonts-kacst fonts-symbola fonts-noto fonts-freefont-ttf \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Install Node environment
|
||||
RUN curl -s https://deb.nodesource.com/gpgkey/nodesource.gpg.key | apt-key add - \
|
||||
&& echo 'deb https://deb.nodesource.com/node_14.x buster main' >> /etc/apt/sources.list \
|
||||
&& apt-get update -qq \
|
||||
&& apt-get install -qq -y --no-install-recommends \
|
||||
apt-transport-https ca-certificates apt-utils gnupg gosu gnupg2 libgconf-2-4 zlib1g-dev \
|
||||
dumb-init jq git wget curl youtube-dl ffmpeg \
|
||||
&& curl -sSL "https://dl.google.com/linux/linux_signing_key.pub" | apt-key add - \
|
||||
&& echo "deb https://dl.google.com/linux/chrome/deb/ stable main" > /etc/apt/sources.list.d/google-chrome.list \
|
||||
&& curl -sL https://deb.nodesource.com/setup_14.x | bash - \
|
||||
&& apt-get update -qq \
|
||||
&& apt-get install -qq -y --no-install-recommends \
|
||||
google-chrome-stable \
|
||||
fontconfig \
|
||||
fonts-ipafont-gothic \
|
||||
fonts-wqy-zenhei \
|
||||
fonts-thai-tlwg \
|
||||
fonts-kacst \
|
||||
fonts-symbola \
|
||||
fonts-noto \
|
||||
fonts-freefont-ttf \
|
||||
nodejs \
|
||||
unzip \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
nodejs \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Clone singlefile and move it to the /bin folder so archivebox can find it
|
||||
|
||||
WORKDIR "$EXTRA_PATH"
|
||||
RUN wget -qO - https://github.com/gildas-lormeau/SingleFile/archive/master.zip > SingleFile.zip \
|
||||
&& unzip -q SingleFile.zip \
|
||||
&& npm install --prefix SingleFile-master/cli --production > /dev/null 2>&1 \
|
||||
&& chmod +x SingleFile-master/cli/single-file
|
||||
|
||||
RUN wget -qO - https://github.com/pirate/readability-extractor/archive/master.zip > readability.zip \
|
||||
&& unzip -q readability.zip \
|
||||
&& npm install --prefix readability-extractor-master --production > /dev/null 2>&1 \
|
||||
&& chmod +x readability-extractor-master/readability-extractor
|
||||
|
||||
# Run everything from here on out as non-privileged user
|
||||
RUN groupadd --system archivebox \
|
||||
&& useradd --system --create-home --gid archivebox --groups audio,video archivebox
|
||||
|
||||
ADD . "$CODE_PATH"
|
||||
WORKDIR "$CODE_PATH"
|
||||
# Install Python dependencies
|
||||
WORKDIR "$CODE_DIR"
|
||||
ENV PATH="${PATH}:$VENV_PATH/bin"
|
||||
RUN python -m venv --clear --symlinks "$VENV_PATH" \
|
||||
&& pip install --upgrade pip setuptools \
|
||||
&& pip install -e .
|
||||
&& pip install --upgrade --quiet pip setuptools
|
||||
ADD ./archivebox.egg-info/requires.txt "$CODE_DIR/archivebox.egg-info/requires.txt"
|
||||
RUN apt-get update -qq \
|
||||
&& apt-get install -qq -y --no-install-recommends \
|
||||
build-essential python-dev python3-dev \
|
||||
&& grep -B 1000 -E '^$' "$CODE_DIR/archivebox.egg-info/requires.txt" | pip install --quiet -r /dev/stdin \
|
||||
&& apt-get purge -y build-essential python-dev python3-dev \
|
||||
&& apt-get autoremove -y \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
VOLUME "$DATA_PATH"
|
||||
WORKDIR "$DATA_PATH"
|
||||
EXPOSE 8000
|
||||
# Install Node dependencies
|
||||
WORKDIR "$NODE_DIR"
|
||||
ENV PATH="${PATH}:$NODE_DIR/node_modules/.bin" \
|
||||
npm_config_loglevel=error
|
||||
RUN npm install -g npm
|
||||
ADD ./package.json ./package.json
|
||||
RUN npm install
|
||||
|
||||
# Install ArchiveBox Python package
|
||||
WORKDIR "$CODE_DIR"
|
||||
ADD . "$CODE_DIR"
|
||||
RUN pip install -e .
|
||||
|
||||
# Setup ArchiveBox runtime config
|
||||
WORKDIR "$DATA_DIR"
|
||||
ENV IN_DOCKER=True \
|
||||
CHROME_BINARY=google-chrome \
|
||||
CHROME_SANDBOX=False \
|
||||
USE_SINGLEFILE="true" \
|
||||
SINGLEFILE_BINARY="$EXTRA_PATH/SingleFile-master/cli/single-file" \
|
||||
USE_READABILITY="true" \
|
||||
READABILITY_BINARY="$EXTRA_PATH/readability-extractor-master/readability-extractor"
|
||||
CHROME_BINARY="chromium" \
|
||||
USE_SINGLEFILE=True \
|
||||
SINGLEFILE_BINARY="$NODE_DIR/node_modules/.bin/single-file" \
|
||||
USE_READABILITY=True \
|
||||
READABILITY_BINARY="$NODE_DIR/node_modules/.bin/readability-extractor"
|
||||
|
||||
RUN env ALLOW_ROOT=True archivebox version
|
||||
# Print version for nice docker finish summary
|
||||
RUN archivebox version
|
||||
|
||||
# Open up the interfaces to the outside world
|
||||
VOLUME "$DATA_DIR"
|
||||
VOLUME "$CODE_DIR"
|
||||
EXPOSE 8000
|
||||
|
||||
ENTRYPOINT ["dumb-init", "--", "/app/bin/docker_entrypoint.sh"]
|
||||
CMD ["archivebox", "server", "0.0.0.0:8000"]
|
||||
|
Reference in New Issue
Block a user