# syntax=docker/dockerfile:1.7

# Multistage ArchiveBox Dockerfile that consumes the abx-dl runtime image.
# abx-dl owns Python, Node, Chromium, and downloader plugin runtimes.
# ArchiveBox owns sonic, supervisor, Django, and the app runtime.
# Build abx-dl first, then point this file at it:
#   docker buildx build ../abx-dl -f ../abx-dl/Dockerfile \
#       --build-context abxbus=../abxbus \
#       --build-context abxpkg=../abxpkg \
#       --build-context abx-plugins=../abx-plugins \
#       -t archivebox/abx-dl:dev
#   docker buildx build . -f Dockerfile \
#       --build-arg ABX_DL_IMAGE=archivebox/abx-dl:latest \
#       -t archivebox:multistage

ARG ABX_DL_IMAGE=archivebox/abx-dl:latest

FROM archivebox/sonic:1.4.9 AS sonic
FROM ${ABX_DL_IMAGE} AS archivebox-runtime-base

ARG TARGETPLATFORM
ARG TARGETOS
ARG TARGETARCH
ARG TARGETVARIANT

ENV TZ=UTC \
    LANGUAGE=en_US:en \
    LC_ALL=C.UTF-8 \
    LANG=C.UTF-8 \
    DEBIAN_FRONTEND=noninteractive \
    APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=1 \
    PYTHONIOENCODING=UTF-8 \
    PYTHONUNBUFFERED=1 \
    PYTHONDONTWRITEBYTECODE=1 \
    PIP_DISABLE_PIP_VERSION_CHECK=1 \
    PIP_NO_COMPILE=1 \
    PIP_ONLY_BINARY=aiohttp \
    npm_config_loglevel=error

ENV PYTHON_VERSION=3.13 \
    NODE_VERSION=24

ENV ARCHIVEBOX_USER=archivebox \
    DEFAULT_ARCHIVEBOX_UID=911 \
    DEFAULT_ARCHIVEBOX_GID=911 \
    IN_DOCKER=True

ENV CODE_DIR=/app \
    DATA_DIR=/data \
    CONFIG_DIR=/opt/archivebox \
    ABXPKG_LIB_DIR=/opt/archivebox/lib \
    PLAYWRIGHT_BROWSERS_PATH=/opt/archivebox/lib/playwright/cache \
    PERSONAS_DIR=/data/personas \
    CHROME_HEADLESS=true \
    CHROME_SANDBOX=false \
    CHROME_ISOLATION=crawl \
    CHROME_ARGS_EXTRA='["--disable-gpu","--disable-features=Translate,OptimizationGuideModelDownloading,MediaRouter"]'

ENV TMP_DIR=/tmp/archivebox \
    PIP_VENV_PYTHON=/venv/bin/python3 \
    GOOGLE_API_KEY=no \
    GOOGLE_DEFAULT_CLIENT_ID=no \
    GOOGLE_DEFAULT_CLIENT_SECRET=no

ENV HOME=/home/archivebox \
    XDG_CONFIG_HOME=/home/archivebox/.config \
    XDG_CACHE_HOME=/opt/archivebox/lib/cache \
    ABXPKG_INSTALL_TIMEOUT=600 \
    ABXPKG_POSTINSTALL_SCRIPTS=True \
    ABXPKG_MIN_RELEASE_AGE=0 \
    TIMEOUT=600

ENV UV_COMPILE_BYTECODE=false \
    UV_PYTHON_PREFERENCE=managed \
    UV_PYTHON_INSTALL_DIR=/opt/uv/python \
    UV_LINK_MODE=copy \
    UV_PROJECT_ENVIRONMENT=/venv \
    VIRTUAL_ENV=/venv \
    PATH="/venv/bin:/opt/node/bin:$PATH"

SHELL ["/bin/bash", "-o", "pipefail", "-o", "errexit", "-o", "errtrace", "-o", "nounset", "-c"]
WORKDIR "$CODE_DIR"

RUN cp /VERSION.txt /ABX-DL-VERSION.txt \
    && (echo "[i] Docker build for ArchiveBox multistage starting..." \
    && echo "PLATFORM=${TARGETPLATFORM} ARCH=$(uname -m) (${TARGETARCH} ${TARGETVARIANT})" \
    && echo "BUILD_START_TIME=$(date +"%Y-%m-%d %H:%M:%S %s") TZ=${TZ} LANG=${LANG}" \
    && uname -a \
    && sed -n '1,7p' /etc/os-release \
    && abxpkg load --binproviders=env node \
    && abxpkg load --binproviders=env uv \
    ) | tee -a /VERSION.txt

FROM archivebox-runtime-base AS archivebox-builder

WORKDIR "$CODE_DIR"
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-$TARGETARCH$TARGETVARIANT \
    --mount=type=cache,target=/root/.cache/uv,sharing=locked,id=uv-$TARGETARCH$TARGETVARIANT \
    --mount=type=bind,source=pyproject.toml,target=/app/pyproject.toml \
    <<'EOF'
echo "[+] UV Installing ArchiveBox dependencies from pyproject.toml..."
echo 'Binary::apt::APT::Keep-Downloaded-Packages "1";' > /etc/apt/apt.conf.d/99keep-cache
echo 'APT::Install-Recommends "0";' > /etc/apt/apt.conf.d/99no-install-recommends
echo 'APT::Install-Suggests "0";' > /etc/apt/apt.conf.d/99no-install-suggests
rm -f /etc/apt/apt.conf.d/docker-clean
apt-get update -qq
apt-get install -qq -y --no-install-recommends \
    build-essential gcc libldap2-dev libsasl2-dev libssl-dev
/usr/bin/uv venv --clear /venv --python "${PYTHON_VERSION}"
/usr/bin/uv pip install setuptools pip wheel

mkdir -p /tmp/archivebox-uv-project
/venv/bin/python - <<'PY'
from pathlib import Path
import json
import re
import urllib.request

source = Path("/app/pyproject.toml")
target = Path("/tmp/archivebox-uv-project/pyproject.toml")
text = source.read_text()
text = text.replace(
    'environments = ["sys_platform == \'darwin\'", "sys_platform == \'linux\'"]',
    'environments = ["sys_platform == \'linux\'"]',
)

# Docker builds need the just-published internal abx wheels immediately, but
# PyPI simple can lag the version JSON endpoints by tens of minutes. Generate a
# Docker-only dependency view from the version JSON so the published package
# metadata stays normal while image builds remain resumable after a release.
for package in ("abxpkg", "abx-plugins", "abx-dl"):
    match = re.search(rf'"{re.escape(package)}>=(?P<version>[^"]+)"', text)
    if not match:
        continue
    version = match.group("version")
    with urllib.request.urlopen(f"https://pypi.org/pypi/{package}/{version}/json", timeout=20) as response:
        data = json.load(response)
    wheel_url = next(url["url"] for url in data["urls"] if url["filename"].endswith(".whl"))
    text = re.sub(
        rf'"{re.escape(package)}>=[^"]+"',
        f'"{package} @ {wheel_url}"',
        text,
        count=1,
    )

target.write_text(text)
PY

/usr/bin/uv sync \
    --project /tmp/archivebox-uv-project \
    --refresh \
    --no-dev \
    --inexact \
    --no-install-project \
    --no-install-workspace \
    --no-sources
(find /venv/lib/python3.*/site-packages -type f -name '*.so' -exec strip --strip-unneeded {} + 2>/dev/null || true)
rm -f /venv/bin/uv /venv/bin/uvx
apt-get purge -y build-essential gcc libldap2-dev libsasl2-dev libssl-dev
apt-get autoremove -y
rm -rf /var/lib/apt/lists/*
EOF

COPY --chown=root:root --chmod=755 "." "$CODE_DIR/"
RUN --mount=type=cache,target=/root/.cache/uv,sharing=locked,id=uv-$TARGETARCH$TARGETVARIANT \
    echo "[*] Installing ArchiveBox Python source code from $CODE_DIR..." \
    && COMMIT_HASH="$( \
        if [[ -f "$CODE_DIR/.git/HEAD" ]]; then \
            HEAD_REF="$(cat "$CODE_DIR/.git/HEAD")"; \
            if [[ "$HEAD_REF" =~ ^[0-9a-fA-F]{40}$ ]]; then \
                echo "$HEAD_REF"; \
            elif [[ "$HEAD_REF" == ref:\ * ]]; then \
                REF_PATH="${HEAD_REF#ref: }"; \
                cat "$CODE_DIR/.git/$REF_PATH" 2>/dev/null || awk -v ref="$REF_PATH" '$2 == ref {print $1}' "$CODE_DIR/.git/packed-refs" 2>/dev/null || true; \
            fi; \
        fi)" \
    && if [[ "$COMMIT_HASH" =~ ^[0-9a-fA-F]{40}$ ]]; then echo "COMMIT_HASH=$COMMIT_HASH" | tee -a /VERSION.txt; fi \
    && /usr/bin/uv pip install --no-deps "$CODE_DIR" \
    && rm -f /venv/bin/uv /venv/bin/uvx \
    && /usr/bin/uv pip show archivebox | tee -a /VERSION.txt

FROM archivebox-runtime-base

LABEL name="archivebox" \
    maintainer="Nick Sweeting <dockerfile@archivebox.io>" \
    description="All-in-one self-hosted internet archiving solution" \
    homepage="https://github.com/ArchiveBox/ArchiveBox" \
    documentation="https://github.com/ArchiveBox/ArchiveBox/wiki/Docker" \
    org.opencontainers.image.title="ArchiveBox" \
    org.opencontainers.image.vendor="ArchiveBox" \
    org.opencontainers.image.description="All-in-one self-hosted internet archiving solution" \
    org.opencontainers.image.source="https://github.com/ArchiveBox/ArchiveBox" \
    com.docker.image.source.entrypoint="Dockerfile"

COPY --from=sonic /usr/local/bin/sonic /usr/local/bin/sonic
COPY --chown=root:root --chmod=755 "etc/sonic.cfg" /etc/sonic.cfg

COPY --from=archivebox-builder /opt/uv/python /opt/uv/python
COPY --from=archivebox-builder /venv /venv
COPY --from=archivebox-builder /app /app
COPY --from=archivebox-builder /VERSION.txt /VERSION.txt

RUN echo "[*] Setting up $ARCHIVEBOX_USER user uid=${DEFAULT_ARCHIVEBOX_UID}..." \
    && printf 'export PATH="/venv/bin:/opt/node/bin:$PATH"\n' > /etc/profile.d/archivebox-path.sh \
    && ln -sf /venv/bin/archivebox /usr/local/bin/archivebox \
    && ln -sf /venv/bin/daphne /usr/local/bin/daphne \
    && ln -sf /venv/bin/supervisord /usr/local/bin/supervisord \
    && ln -sf /venv/bin/supervisorctl /usr/local/bin/supervisorctl \
    && getent group "$ARCHIVEBOX_USER" >/dev/null || groupadd --system "$ARCHIVEBOX_USER" \
    && id -u "$ARCHIVEBOX_USER" >/dev/null 2>&1 || useradd --system --create-home --gid "$ARCHIVEBOX_USER" --groups audio,video "$ARCHIVEBOX_USER" \
    && usermod --append --groups audio,video "$ARCHIVEBOX_USER" \
    && [[ "$(id -u "$ARCHIVEBOX_USER")" == "$DEFAULT_ARCHIVEBOX_UID" ]] || usermod -u "$DEFAULT_ARCHIVEBOX_UID" "$ARCHIVEBOX_USER" \
    && [[ "$(id -g "$ARCHIVEBOX_USER")" == "$DEFAULT_ARCHIVEBOX_GID" ]] || groupmod -g "$DEFAULT_ARCHIVEBOX_GID" "$ARCHIVEBOX_USER" \
    && abxpkg load --binproviders=env sonic | tee -a /VERSION.txt \
    && install -d -o "$DEFAULT_ARCHIVEBOX_UID" -g "$DEFAULT_ARCHIVEBOX_GID" "$DATA_DIR" "$TMP_DIR" "$CONFIG_DIR" "$ABXPKG_LIB_DIR" "$XDG_CACHE_HOME" "$PLAYWRIGHT_BROWSERS_PATH" \
    && install -d -o "$DEFAULT_ARCHIVEBOX_UID" -g "$DEFAULT_ARCHIVEBOX_GID" "/home/$ARCHIVEBOX_USER" \
    && chown "$DEFAULT_ARCHIVEBOX_UID:$DEFAULT_ARCHIVEBOX_GID" "$DATA_DIR" "$TMP_DIR" \
    && chown -R "$DEFAULT_ARCHIVEBOX_UID:$DEFAULT_ARCHIVEBOX_GID" "$ABXPKG_LIB_DIR" \
    && openssl rand -hex 16 > /etc/machine-id \
    && echo -e "\nARCHIVEBOX_USER=$ARCHIVEBOX_USER ARCHIVEBOX_UID=$(id -u "$ARCHIVEBOX_USER") ARCHIVEBOX_GID=$(id -g "$ARCHIVEBOX_USER")" | tee -a /VERSION.txt \
    && echo -e "TMP_DIR=$TMP_DIR\nABXPKG_LIB_DIR=$ABXPKG_LIB_DIR\nPLAYWRIGHT_BROWSERS_PATH=$PLAYWRIGHT_BROWSERS_PATH\nMACHINE_ID=$(cat /etc/machine-id)\n" | tee -a /VERSION.txt

WORKDIR "$DATA_DIR"
RUN echo "[+] Initializing image collection..." \
    && find "$DATA_DIR" -mindepth 1 -maxdepth 1 -exec rm -rf {} + \
    && HOME="$TMP_DIR" archivebox init \
    && (chown "$DEFAULT_ARCHIVEBOX_UID:$DEFAULT_ARCHIVEBOX_GID" \
        "$DATA_DIR" "$DATA_DIR"/.archivebox_id "$DATA_DIR"/ArchiveBox.conf "$DATA_DIR"/index.sqlite3 \
        "$DATA_DIR"/logs "$DATA_DIR"/logs/* "$DATA_DIR"/sources \
        "$DATA_DIR"/archive "$DATA_DIR"/archive/users "$DATA_DIR"/personas \
        "$DATA_DIR"/tmp "$DATA_DIR"/tmp/* \
        "$CONFIG_DIR" "$CONFIG_DIR"/config.env "$CONFIG_DIR"/derived.env \
        "$TMP_DIR" "$ABXPKG_LIB_DIR" "$XDG_CACHE_HOME" "$PLAYWRIGHT_BROWSERS_PATH" \
        2>/dev/null || true) \
    && find "$TMP_DIR" -mindepth 1 -maxdepth 1 -exec rm -rf {} +

RUN chmod +x "$CODE_DIR"/bin/*.sh \
    && chmod g+w "$TMP_DIR" "$ABXPKG_LIB_DIR" "$PLAYWRIGHT_BROWSERS_PATH"

RUN --mount=type=cache,target=/tmp/abxpkg-cache,sharing=locked,mode=1777 \
    for forbidden_bin in gcc g++ make; do ! abxpkg load --binproviders=env "$forbidden_bin" >/dev/null 2>&1 || (echo "Unexpected build tool in runtime: $forbidden_bin" >&2 && exit 1); done \
    && stat -c "%U:%G %a %n" "$CONFIG_DIR" "$ABXPKG_LIB_DIR" "$PLAYWRIGHT_BROWSERS_PATH" \
    && setpriv --reuid="$ARCHIVEBOX_USER" --regid="$ARCHIVEBOX_USER" --init-groups test -w "$CONFIG_DIR" \
    && setpriv --reuid="$ARCHIVEBOX_USER" --regid="$ARCHIVEBOX_USER" --init-groups test -w "$ABXPKG_LIB_DIR" \
    && python3 -c 'from abx_dl.models import discover_plugins; [print(f"export {plugin.enabled_key}=True") for plugin in discover_plugins(runtime="archivebox").values() if plugin.enabled_key in plugin.config.properties]' > /tmp/archivebox-enable-plugins.env \
    && sort /tmp/archivebox-enable-plugins.env | tee -a /VERSION.txt \
    && source /tmp/archivebox-enable-plugins.env \
    && setpriv --reuid="$ARCHIVEBOX_USER" --regid="$ARCHIVEBOX_USER" --init-groups env HOME=/tmp/abxpkg-cache ABXPKG_NO_CACHE=True ABXPKG_TMP_CACHE_DIR=/tmp/abxpkg-cache archivebox install \
    && setpriv --reuid="$ARCHIVEBOX_USER" --regid="$ARCHIVEBOX_USER" --init-groups env HOME=/tmp/abxpkg-cache archivebox version 2>&1 | tee -a /VERSION.txt \
    && rm -rf /root/.cache /var/cache/apt/* /var/lib/apt/lists/*

RUN (echo -e "\n\n[√] Finished ArchiveBox multistage Docker build successfully." \
    && echo -e "PLATFORM=${TARGETPLATFORM} ARCH=$(uname -m) (${TARGETARCH} ${TARGETVARIANT})" \
    && echo -e "BUILD_END_TIME=$(date +"%Y-%m-%d %H:%M:%S %s")\n\n" \
    ) | tee -a /VERSION.txt

WORKDIR "$DATA_DIR"
VOLUME "$DATA_DIR"
EXPOSE 8000

HEALTHCHECK --interval=30s --timeout=20s --retries=15 \
    CMD curl --fail --silent --show-error --max-time 5 --connect-timeout 2 'http://admin.archivebox.localhost:8000/health/' | grep -q 'OK'

ENTRYPOINT ["dumb-init", "--", "/app/bin/docker_entrypoint.sh"]
CMD ["archivebox", "server", "--init", "0.0.0.0:8000"]
