Files
ArchiveBox/bin/validate_docker_uid_gid.sh
2026-06-14 11:44:38 -07:00

450 lines
16 KiB
Bash
Executable File

#!/usr/bin/env bash
set -Eeuo pipefail
IMAGE="${IMAGE:-archivebox/archivebox:dev}"
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
ENTRYPOINT_PATH="${ENTRYPOINT_PATH:-$REPO_DIR/bin/docker_entrypoint.sh}"
RUN_ID="${RUN_ID:-$(date +%Y%m%d-%H%M%S)-$$}"
VALIDATION_ROOT="${VALIDATION_ROOT:-$REPO_DIR/tmp/docker-uid-gid-validation/$RUN_ID}"
KEEP_VALIDATION_ROOT="${KEEP_VALIDATION_ROOT:-0}"
REMOTE_HOST=""
LOCAL_ONLY=0
while [[ $# -gt 0 ]]; do
case "$1" in
--remote)
REMOTE_HOST="$2"
shift 2
;;
--local-only)
LOCAL_ONLY=1
shift
;;
--entrypoint)
ENTRYPOINT_PATH="$2"
shift 2
;;
--workdir)
VALIDATION_ROOT="$2"
shift 2
;;
*)
echo "Usage: $0 [--remote HOST] [--local-only] [--entrypoint PATH] [--workdir PATH]" >&2
exit 2
;;
esac
done
if [[ -n "$REMOTE_HOST" && "$LOCAL_ONLY" != "1" ]]; then
remote_dir="/tmp/archivebox-uid-gid-validation-$RUN_ID"
ssh "$REMOTE_HOST" "mkdir -p '$remote_dir'"
scp "$0" "$ENTRYPOINT_PATH" "$REMOTE_HOST:$remote_dir/" >/dev/null
ssh "$REMOTE_HOST" "cd '$remote_dir' && IMAGE='$IMAGE' ENTRYPOINT_PATH='$remote_dir/$(basename "$ENTRYPOINT_PATH")' bash './$(basename "$0")' --local-only --entrypoint '$remote_dir/$(basename "$ENTRYPOINT_PATH")' --workdir '$remote_dir/work'"
exit $?
fi
DOCKER_PLATFORM="${DOCKER_PLATFORM:-}"
if [[ -z "$DOCKER_PLATFORM" ]]; then
case "$(uname -m)" in
arm64|aarch64) DOCKER_PLATFORM="linux/amd64" ;;
esac
fi
docker_base=(docker run --rm)
if [[ -n "$DOCKER_PLATFORM" ]]; then
docker_base+=(--platform "$DOCKER_PLATFORM")
fi
mkdir -p "$VALIDATION_ROOT"
total=0
passed=0
failed=0
log() {
printf '%s\n' "$*"
}
safe_name() {
printf '%s' "$1" | tr -cs 'A-Za-z0-9_.-' '-'
}
docker_setup() {
local case_dir="$1"
local setup_script="$2"
mkdir -p "$case_dir"
"${docker_base[@]}" \
-v "$case_dir:/case" \
--entrypoint /bin/bash \
"$IMAGE" \
-lc "set -Eeuo pipefail
rm -rf /case/data /case/lib /case/browsers
mkdir -p /case/data /case/lib /case/browsers
chown 0:0 /case/data /case/lib /case/browsers
chmod 755 /case/data /case/lib /case/browsers
$setup_script"
}
default_cmd='printf "ABX_UID=%s\nABX_GID=%s\nABX_USER=%s\nABX_GROUPS=%s\n" "$(id -u)" "$(id -g)" "$(whoami 2>/dev/null || true)" "$(id -Gn)"; touch /data/logs/probe /data/archive/probe "$ABXPKG_LIB_DIR/probe" "$PERSONAS_DIR/Default/chrome_profile/probe"; stat -c "ABX_STAT %u:%g:%a %n" /data /data/logs /data/archive "$ABXPKG_LIB_DIR" "$PERSONAS_DIR" "$PERSONAS_DIR/Default" "$PERSONAS_DIR/Default/chrome_profile"; echo ABX_PERSONA_PROFILE_OK; echo ABX_OK'
version_cmd='printf "ABX_UID=%s\nABX_GID=%s\nABX_USER=%s\nABX_GROUPS=%s\n" "$(id -u)" "$(id -g)" "$(whoami 2>/dev/null || true)" "$(id -Gn)"; archivebox version >/tmp/archivebox-version.out; tail -n 12 /tmp/archivebox-version.out; echo ABX_OK'
full_flow_cmd='set -Eeuo pipefail
printf "ABX_UID=%s\nABX_GID=%s\nABX_USER=%s\nABX_GROUPS=%s\n" "$(id -u)" "$(id -g)" "$(whoami 2>/dev/null || true)" "$(id -Gn)"
id -Gn | grep -qw audio
id -Gn | grep -qw video
mkdir -p "$PERSONAS_DIR/Default/chrome_profile"
touch "$PERSONAS_DIR/Default/chrome_profile/probe"
rm -f "$PERSONAS_DIR/Default/chrome_profile/probe"
archivebox init
archivebox install 2>&1 | tee /tmp/archivebox-install.log
if grep -E "(/[[:alnum:]_.-]+/)?pip install|npm install|uv pip install" /tmp/archivebox-install.log; then
echo "ABX_UNEXPECTED_RUNTIME_INSTALL"
exit 1
fi
archivebox version 2>&1 | tee /tmp/archivebox-version.log
grep -Eq "/opt/archivebox/lib/(uv/venv/bin|env/bin)/trafilatura" /tmp/archivebox-version.log
grep -Eq "/opt/archivebox/lib/(pnpm/packages/defuddle/node_modules/.bin|env/bin)/defuddle" /tmp/archivebox-version.log
grep -Eq "/opt/archivebox/lib/env/bin/sonic" /tmp/archivebox-version.log
archivebox add --depth=0 https://example.com/ 2>&1 | tee /tmp/archivebox-add.log
archivebox update --index-only 2>&1 | tee /tmp/archivebox-update.log
snapshot_dir="$(find /data/archive/users/system/snapshots -mindepth 3 -maxdepth 3 -type d | head -n 1)"
test -n "$snapshot_dir"
test -s "$snapshot_dir/index.html"
test -s "$snapshot_dir/wget/example.com/index.html"
test -s "$snapshot_dir/dom/output.html"
test -s "$snapshot_dir/screenshot/screenshot.png"
test -s "$snapshot_dir/pdf/output.pdf"
test -s "$snapshot_dir/singlefile/singlefile.html"
test -s "$snapshot_dir/headers/headers.json"
test -s "$snapshot_dir/readability/content.txt"
test -s "$snapshot_dir/trafilatura/content.txt"
test -s "$snapshot_dir/defuddle/content.txt"
test -s "$snapshot_dir/liteparse/content.txt"
test -s "$snapshot_dir/responses/index.jsonl"
test -s "$snapshot_dir/search_backend_sonic/on_Snapshot__91_index_sonic."*.sh
test -s "$snapshot_dir/search_backend_sqlite/on_Snapshot__90_index_sqlite."*.sh
grep -R "Example Domain" \
"$snapshot_dir/wget/example.com/index.html" \
"$snapshot_dir/dom/output.html" \
"$snapshot_dir/readability/content.txt" \
"$snapshot_dir/trafilatura/content.txt" \
"$snapshot_dir/defuddle/content.txt" \
"$snapshot_dir/liteparse/content.txt"
if grep -R "Permission denied\\|Operation not permitted" /tmp/archivebox-add.log /tmp/archivebox-update.log /data/logs 2>/dev/null; then
echo "ABX_PERMISSION_ERROR_IN_FULL_FLOW"
exit 1
fi
find "$snapshot_dir" -maxdepth 2 -type f | sort | sed "s#^#ABX_OUTPUT #"
echo ABX_OK'
run_case() {
local name="$1"
local setup_script="$2"
local env_string="$3"
local user_spec="$4"
local expected_status="$5"
local expected_uid="$6"
local expected_gid="$7"
local command="${8:-$default_cmd}"
local post_assert="${9:-}"
total=$((total + 1))
local slug case_dir log_file status
slug="$(safe_name "$name")"
case_dir="$VALIDATION_ROOT/$slug"
log_file="$case_dir/output.log"
docker_setup "$case_dir" "$setup_script"
local run_args=("${docker_base[@]}")
if [[ "$user_spec" != "-" ]]; then
run_args+=(--user "$user_spec")
fi
run_args+=(
-e DATA_DIR=/data
-e ABXPKG_LIB_DIR=/libdir
-e PLAYWRIGHT_BROWSERS_PATH=/browsers
)
if [[ -n "$env_string" && "$env_string" != "-" ]]; then
local env_parts=()
read -r -a env_parts <<< "$env_string"
local env_pair
for env_pair in "${env_parts[@]}"; do
run_args+=(-e "$env_pair")
done
fi
run_args+=(
-v "$ENTRYPOINT_PATH:/app/bin/docker_entrypoint.sh:ro"
-v "$case_dir/data:/data"
-v "$case_dir/lib:/libdir"
-v "$case_dir/browsers:/browsers"
--entrypoint /app/bin/docker_entrypoint.sh
"$IMAGE"
sh -c "$command"
)
set +e
"${run_args[@]}" >"$log_file" 2>&1
status=$?
set -e
local ok=1
if [[ "$expected_status" == "pass" && "$status" != "0" ]]; then
ok=0
elif [[ "$expected_status" == "fail" && "$status" == "0" ]]; then
ok=0
fi
if [[ "$expected_status" == "pass" ]]; then
if [[ -n "$expected_uid" ]] && ! grep -q "^ABX_UID=$expected_uid$" "$log_file"; then
ok=0
fi
if [[ -n "$expected_gid" ]] && ! grep -q "^ABX_GID=$expected_gid$" "$log_file"; then
ok=0
fi
if ! grep -q '^ABX_OK$' "$log_file"; then
ok=0
fi
if [[ "$user_spec" == "-" ]]; then
grep '^ABX_GROUPS=' "$log_file" | grep -qw audio || ok=0
grep '^ABX_GROUPS=' "$log_file" | grep -qw video || ok=0
grep -q '^ABX_PERSONA_PROFILE_OK$' "$log_file" || ok=0
fi
fi
if [[ "$post_assert" == "nested-root-stays" ]]; then
local nested_stat
nested_stat="$("${docker_base[@]}" -v "$case_dir/data:/data" --entrypoint /bin/bash "$IMAGE" -lc "stat -c '%u:%g' /data/archive/existing/file" 2>/dev/null || true)"
[[ "$nested_stat" == "0:0" ]] || ok=0
elif [[ "$post_assert" == users-dir-repaired ]]; then
local users_stat
users_stat="$("${docker_base[@]}" -v "$case_dir/data:/data" --entrypoint /bin/bash "$IMAGE" -lc "stat -c '%u:%g' /data/users" 2>/dev/null || true)"
[[ "$users_stat" == "$expected_uid:$expected_gid" ]] || ok=0
elif [[ "$post_assert" == config-files-repaired ]]; then
local config_stat index_stat
config_stat="$("${docker_base[@]}" -v "$case_dir/data:/data" --entrypoint /bin/bash "$IMAGE" -lc "stat -c '%u:%g' /data/ArchiveBox.conf" 2>/dev/null || true)"
index_stat="$("${docker_base[@]}" -v "$case_dir/data:/data" --entrypoint /bin/bash "$IMAGE" -lc "stat -c '%u:%g' /data/index.sqlite3" 2>/dev/null || true)"
[[ "$config_stat" == "$expected_uid:$expected_gid" && "$index_stat" == "$expected_uid:$expected_gid" ]] || ok=0
fi
if [[ "$ok" == "1" ]]; then
passed=$((passed + 1))
log "PASS $name"
else
failed=$((failed + 1))
log "FAIL $name (status=$status expected=$expected_status log=$log_file)"
sed -n '1,160p' "$log_file"
fi
}
run_readonly_case() {
local name="$1"
local setup_script="$2"
local env_string="$3"
local user_spec="$4"
total=$((total + 1))
local slug case_dir log_file status ok
slug="$(safe_name "$name")"
case_dir="$VALIDATION_ROOT/$slug"
log_file="$case_dir/output.log"
docker_setup "$case_dir" "$setup_script"
local run_args=("${docker_base[@]}")
if [[ "$user_spec" != "-" ]]; then
run_args+=(--user "$user_spec")
fi
run_args+=(
-e DATA_DIR=/data
-e ABXPKG_LIB_DIR=/libdir
-e PLAYWRIGHT_BROWSERS_PATH=/browsers
)
if [[ -n "$env_string" && "$env_string" != "-" ]]; then
local env_parts=()
read -r -a env_parts <<< "$env_string"
local env_pair
for env_pair in "${env_parts[@]}"; do
run_args+=(-e "$env_pair")
done
fi
run_args+=(
-v "$ENTRYPOINT_PATH:/app/bin/docker_entrypoint.sh:ro"
-v "$case_dir/data:/data:ro"
-v "$case_dir/lib:/libdir:ro"
-v "$case_dir/browsers:/browsers:ro"
--entrypoint /app/bin/docker_entrypoint.sh
"$IMAGE"
sh -c "$default_cmd"
)
set +e
"${run_args[@]}" >"$log_file" 2>&1
status=$?
set -e
ok=0
if [[ "$status" != "0" ]] && grep -q "cannot write to /data" "$log_file"; then
ok=1
fi
if [[ "$ok" == "1" ]]; then
passed=$((passed + 1))
log "PASS $name"
else
failed=$((failed + 1))
log "FAIL $name (status=$status expected=readonly failure log=$log_file)"
sed -n '1,160p' "$log_file"
fi
}
run_full_flow_case() {
local name="$1"
local setup_script="$2"
local env_string="$3"
local user_spec="$4"
local expected_uid="$5"
local expected_gid="$6"
total=$((total + 1))
local slug case_dir log_file status ok
slug="$(safe_name "$name")"
case_dir="$VALIDATION_ROOT/$slug"
log_file="$case_dir/output.log"
docker_setup "$case_dir" "$setup_script"
local run_args=("${docker_base[@]}")
if [[ "$user_spec" != "-" ]]; then
run_args+=(--user "$user_spec")
fi
run_args+=(
-e DATA_DIR=/data
)
if [[ -n "$env_string" && "$env_string" != "-" ]]; then
local env_parts=()
read -r -a env_parts <<< "$env_string"
local env_pair
for env_pair in "${env_parts[@]}"; do
run_args+=(-e "$env_pair")
done
fi
run_args+=(
-v "$ENTRYPOINT_PATH:/app/bin/docker_entrypoint.sh:ro"
-v "$case_dir/data:/data"
--entrypoint /app/bin/docker_entrypoint.sh
"$IMAGE"
bash -lc "$full_flow_cmd"
)
set +e
"${run_args[@]}" >"$log_file" 2>&1
status=$?
set -e
ok=1
[[ "$status" == "0" ]] || ok=0
grep -q "^ABX_UID=$expected_uid$" "$log_file" || ok=0
grep -q "^ABX_GID=$expected_gid$" "$log_file" || ok=0
grep '^ABX_GROUPS=' "$log_file" | grep -qw audio || ok=0
grep '^ABX_GROUPS=' "$log_file" | grep -qw video || ok=0
grep -q '^ABX_OK$' "$log_file" || ok=0
grep -q 'total urls snapshotted: 1' "$log_file" || ok=0
grep -q 'Search Reindex Complete' "$log_file" || ok=0
grep -q 'ABX_OUTPUT .*/wget/example.com/index.html' "$log_file" || ok=0
grep -q 'ABX_OUTPUT .*/screenshot/screenshot.png' "$log_file" || ok=0
grep -q 'ABX_OUTPUT .*/trafilatura/content.txt' "$log_file" || ok=0
if [[ "$ok" == "1" ]]; then
passed=$((passed + 1))
log "PASS $name"
else
failed=$((failed + 1))
log "FAIL $name (status=$status expected=full-flow success log=$log_file)"
sed -n '1,220p' "$log_file"
fi
}
run_mount_case() {
local fs_name="$1"
local mount_dir="$2"
if [[ -z "$mount_dir" || ! -d "$mount_dir" ]]; then
log "SKIP $fs_name mount case: mount dir not provided"
return
fi
if [[ ! -w "$mount_dir" ]]; then
log "SKIP $fs_name mount case: $mount_dir is not writable by host user"
return
fi
local previous_root case_root
previous_root="$VALIDATION_ROOT"
case_root="$mount_dir/archivebox-uidgid-validation-$RUN_ID"
mkdir -p "$case_root"
VALIDATION_ROOT="$case_root"
run_case "$fs_name writable forced-owner style mount" ":" "-" "-" pass 911 911 "$default_cmd" ""
VALIDATION_ROOT="$previous_root"
}
log "Running UID/GID validation on $(hostname) using image=$IMAGE entrypoint=$ENTRYPOINT_PATH root=$VALIDATION_ROOT platform=${DOCKER_PLATFORM:-native}"
run_case "root-owned empty data auto-detect default" \
"chown 0:0 /case/data && chmod 755 /case/data" \
"-" "-" pass 911 911
run_case "501-owned data auto-detected" \
"chown 501:20 /case/data && chmod 755 /case/data" \
"-" "-" pass 501 20
run_case "non-root data with root-owned config files repaired" \
"chown 501:20 /case/data && chmod 755 /case/data && touch /case/data/index.sqlite3 /case/data/ArchiveBox.conf && mkdir -p /case/data/archive/users && chown 0:0 /case/data/index.sqlite3 /case/data/ArchiveBox.conf /case/data/archive/users" \
"-" "-" pass 501 20 "$default_cmd" config-files-repaired
run_case "911-owned data auto-detected" \
"chown 911:911 /case/data && chmod 755 /case/data" \
"-" "-" pass 911 911
run_case "root-owned data falls back to default archivebox user" \
"chown 0:0 /case/data && chmod 755 /case/data" \
"-" "-" pass 911 911
run_case "nested root-owned archive content is not recursively chowned" \
"chown 0:0 /case/data && chmod 755 /case/data && mkdir -p /case/data/archive/existing && touch /case/data/archive/existing/file && chown -R 0:0 /case/data/archive/existing" \
"-" "-" pass 911 911 "$default_cmd" nested-root-stays
run_case "root start fixes read-only top-level data when chmod works" \
"chown 0:0 /case/data && chmod 555 /case/data" \
"-" "-" pass 911 911
run_case "legacy data users dir repaired when present" \
"chown 911:911 /case/data && chmod 755 /case/data && mkdir -p /case/data/users && chown 0:0 /case/data/users" \
"-" "-" pass 911 911 "$default_cmd" users-dir-repaired
run_case "archive dir root-owned inside 911 data repaired shallowly" \
"chown 911:911 /case/data && chmod 755 /case/data && mkdir -p /case/data/archive && chown 0:0 /case/data/archive" \
"-" "-" pass 911 911
run_case "logs dir root-owned mode 000 repaired shallowly" \
"chown 911:911 /case/data && chmod 755 /case/data && mkdir -p /case/data/logs && chown 0:0 /case/data/logs && chmod 000 /case/data/logs" \
"-" "-" pass 911 911
run_case "1001-owned data auto-detected" \
"chown 1001:1001 /case/data && chmod 755 /case/data" \
"-" "-" pass 1001 1001
run_case "root-owned ArchiveBox.conf only is repaired" \
"chown 911:911 /case/data && chmod 755 /case/data && touch /case/data/ArchiveBox.conf /case/data/index.sqlite3 && chown 0:0 /case/data/ArchiveBox.conf /case/data/index.sqlite3" \
"-" "-" pass 911 911 "$default_cmd" config-files-repaired
run_mount_case "NFS" "${NFS_TEST_DIR:-}"
run_mount_case "SMB" "${SMB_TEST_DIR:-}"
log "SUMMARY passed=$passed failed=$failed total=$total"
if [[ "$KEEP_VALIDATION_ROOT" != "1" ]]; then
rm -rf "$VALIDATION_ROOT"
fi
[[ "$failed" == "0" ]]