From dd84532de95bc6eb0a581621a9afb1b298f47bb8 Mon Sep 17 00:00:00 2001
From: Adam Lamers <adamlamers@gmail.com>
Date: Wed, 29 Apr 2026 16:21:05 -0400
Subject: [PATCH] optionally use find binary if gnu and available

---
 .github/workflows/ci.yml                      |  10 +-
 GEMINI.md                                     |   6 +-
 backend/app/api/inventory.py                  |  11 +-
 backend/app/api/system.py                     |  35 +++
 backend/app/providers/base.py                 |   2 +-
 backend/app/providers/cloud.py                |   2 +-
 backend/app/providers/mock.py                 |   2 +-
 backend/app/services/archiver.py              |  97 +++----
 backend/app/services/scanner.py               | 240 +++++++++++++++---
 backend/tests/test_service_scanner.py         |   3 +
 frontend/playwright.config.ts                 |   7 +
 .../file-browser/FileBrowser.svelte           |  33 ++-
 frontend/tests/e2e.test.ts                    | 170 +++++++++++--
 justfile                                      |  10 +-
 14 files changed, 493 insertions(+), 135 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index ebcb4f7..a3d2596 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -87,21 +87,13 @@ jobs:
         npm ci
         npx playwright install --with-deps chromium
 
-    - name: Start Backend Test Server
-      run: |
-        cd backend
-        uv sync --dev
-        DATABASE_URL="sqlite:///test.db" TAPEHOARD_TEST_MODE="true" uv run alembic upgrade head
-        DATABASE_URL="sqlite:///test.db" TAPEHOARD_TEST_MODE="true" uv run uvicorn app.main:app --host 0.0.0.0 --port 8001 &
-        sleep 5 # Wait for server to start
-
     - name: Run Playwright Tests
       run: |
         cd frontend
         npx playwright test
 
     - name: Upload Playwright Report
-      uses: actions/upload-artifact@v4
+      uses: actions/upload-artifact@v3
       if: always()
       with:
         name: playwright-report
diff --git a/GEMINI.md b/GEMINI.md
index e526c95..d6f2eba 100644
--- a/GEMINI.md
+++ b/GEMINI.md
@@ -76,8 +76,10 @@ This document (`GEMINI.md`) contains critical, contextual information about the
 ### Deployment & Testing
 *   **Temporal Standard:** Backend uses **UTC**. Frontend uses `parseUTCDate` to convert to browser **Local Time**.
 *   **Unsaved Changes Guard:** UI must use `beforeNavigate` and `beforeunload` listeners to warn users if they leave the Settings or Media registration forms with uncommitted changes.
-*   **Testing Protocol:** Use **Alembic-driven file-based SQLite** for tests to ensure 100% schema fidelity (including FTS5 and triggers) and reliable cross-thread data visibility. Atomic truncation must occur between tests.
-*  `just pytest` to run backend tests
+*   **Backend Testing:** Use **Alembic-driven file-based SQLite** for tests to ensure 100% schema fidelity (including FTS5 and triggers) and reliable cross-thread data visibility. Atomic truncation must occur between tests. Run `just pytest` to execute backend tests.
+*   **End-to-End (E2E) Testing:** Playwright is used for E2E testing (`frontend/tests/`).
+    *   **Mock Hardware:** To simulate LTO drives in CI, the backend supports a `TAPEHOARD_TEST_MODE=true` flag. This registers a `MockLTOProvider` that uses local directories instead of physical SCSI devices.
+    *   **Running E2E:** Use `just e2e-server` to start the mock backend (on port 8001), and then `just playwright` to execute the Playwright test suite against it.
 
 ### UI & UX Philosophy
 *   **Direct Terminology:** Use technical terms like "Backup Manager", "System Status", "Archive Index". Avoid marketing fluff.
diff --git a/backend/app/api/inventory.py b/backend/app/api/inventory.py
index b29717d..c2370e7 100644
--- a/backend/app/api/inventory.py
+++ b/backend/app/api/inventory.py
@@ -68,7 +68,7 @@ def list_storage_providers():
     if os.environ.get("TAPEHOARD_TEST_MODE") == "true":
         from app.providers.mock import MockLTOProvider
 
-        providers.append(MockLTOProvider)
+        providers.append(MockLTOProvider)  # ty: ignore[invalid-argument-type]
 
     return [
         StorageProviderSchema(
@@ -756,6 +756,15 @@ def browse_archive_index(path: str = "ROOT", db_session: Session = Depends(get_d
             }
         )
 
+    # Deduplicate by path to prevent frontend keyed each block errors
+    seen_paths: set[str] = set()
+    deduped_results: list[dict] = []
+    for r in results:
+        if r["path"] not in seen_paths:
+            seen_paths.add(r["path"])
+            deduped_results.append(r)
+    results = deduped_results
+
     return results
 
 
diff --git a/backend/app/api/system.py b/backend/app/api/system.py
index 8043764..eccd989 100644
--- a/backend/app/api/system.py
+++ b/backend/app/api/system.py
@@ -160,6 +160,32 @@ def get_ignored_status(
 # --- Endpoints ---
 
 
+@router.post("/test/reset")
+def reset_test_environment(db_session: Session = Depends(get_db)):
+    """Wipes the database and resets state for E2E testing."""
+    import os
+
+    if os.environ.get("TAPEHOARD_TEST_MODE") != "true":
+        raise HTTPException(status_code=403, detail="Reset only allowed in test mode")
+
+    # Wipe tables
+    db_session.query(models.FileVersion).delete()
+    db_session.query(models.RestoreCart).delete()
+    db_session.query(models.Job).delete()
+    db_session.query(models.TrackedSource).delete()
+    db_session.query(models.FilesystemState).delete()
+    db_session.query(models.StorageMedia).delete()
+    # Note: Keep SystemSettings if needed, or wipe them too
+    db_session.query(models.SystemSetting).delete()
+
+    db_session.commit()
+
+    # Clear mock hardware dirs if we can find them
+    # But usually the test will re-initialize them
+
+    return {"message": "Test environment reset"}
+
+
 @router.get("/dashboard/stats", response_model=DashboardStatsSchema)
 def get_dashboard_stats(db_session: Session = Depends(get_db)):
     """Computes high-level system statistics for the overview dashboard."""
@@ -439,6 +465,15 @@ def browse_system_path(
     except PermissionError:
         raise HTTPException(status_code=403, detail="Permission denied")
 
+    # Deduplicate by path to prevent frontend keyed each block errors
+    seen_paths: set[str] = set()
+    deduped_results: list[FileItemSchema] = []
+    for r in results:
+        if r.path not in seen_paths:
+            seen_paths.add(r.path)
+            deduped_results.append(r)
+    results = deduped_results
+
     results.sort(key=lambda x: (x.type != "directory", x.name.lower()))
     return results
 
diff --git a/backend/app/providers/base.py b/backend/app/providers/base.py
index ee008e1..e7f906b 100644
--- a/backend/app/providers/base.py
+++ b/backend/app/providers/base.py
@@ -47,7 +47,7 @@ class AbstractStorageProvider(ABC):
         return {"status": "HEALTHY", "alerts": []}
 
     @abstractmethod
-    def identify_media(self) -> Optional[str]:
+    def identify_media(self, allow_intrusive=True) -> Optional[str]:
         """
         Attempts to read the identifier (barcode/UUID) from the currently inserted media.
         Returns None if no media is inserted or it's unidentifiable.
diff --git a/backend/app/providers/cloud.py b/backend/app/providers/cloud.py
index a58038e..e207c88 100644
--- a/backend/app/providers/cloud.py
+++ b/backend/app/providers/cloud.py
@@ -128,7 +128,7 @@ class CloudStorageProvider(AbstractStorageProvider):
         except Exception:
             return False
 
-    def identify_media(self) -> Optional[str]:
+    def identify_media(self, allow_intrusive=True) -> Optional[str]:
         try:
             response = self.s3.get_object(Bucket=self.bucket_name, Key=".tapehoard_id")
             return response["Body"].read().decode("utf-8").strip()
diff --git a/backend/app/providers/mock.py b/backend/app/providers/mock.py
index b7bbac2..7c3e459 100644
--- a/backend/app/providers/mock.py
+++ b/backend/app/providers/mock.py
@@ -47,7 +47,7 @@ class MockLTOProvider(AbstractStorageProvider):
                 return True
         return False
 
-    def identify_media(self) -> Optional[str]:
+    def identify_media(self, allow_intrusive=True) -> Optional[str]:
         if not os.path.exists(self.mam_path):
             return None
         try:
diff --git a/backend/app/services/archiver.py b/backend/app/services/archiver.py
index 81f74e8..79bed08 100644
--- a/backend/app/services/archiver.py
+++ b/backend/app/services/archiver.py
@@ -2,6 +2,7 @@ import json
 import os
 import shutil
 import subprocess
+import sys
 import tarfile
 import time
 import uuid
@@ -104,7 +105,9 @@ class ArchiverService:
         if os.environ.get("TAPEHOARD_TEST_MODE") == "true":
             from app.providers.mock import MockLTOProvider
 
-            provider_map[MockLTOProvider.provider_id] = MockLTOProvider
+            provider_map[MockLTOProvider.provider_id] = (
+                MockLTOProvider  # ty: ignore[invalid-assignment]
+            )
 
         provider_cls = provider_map.get(media_record.media_type)
         if not provider_cls:
@@ -447,51 +450,59 @@ class ArchiverService:
                     # Sequential Media (Tape): Hybrid Tar Generation
                     has_splits = any(item["is_split"] for item in remaining_to_write)
 
-                    if not has_splits and shutil.which("tar"):
-                        # PERFORMANCE PATH: Use system tar binary for whole files
-                        # Generate a null-terminated file list to handle special characters safely
-                        file_list_path = staging_full_path + ".list"
-                        with open(file_list_path, "w") as f_list:
-                            for item in remaining_to_write:
-                                # Write absolute path to list
-                                f_list.write(item["file_state"].file_path + "\0")
+                    if not has_splits:
+                        # PERFORMANCE PATH: Use GNU tar binary for whole files
+                        # Prefer gtar on Darwin (macOS ships BSD tar without --null support)
+                        tar_binary = None
+                        if sys.platform == "darwin":
+                            tar_binary = shutil.which("gtar")
+                        if tar_binary is None:
+                            tar_binary = shutil.which("tar")
 
-                        try:
-                            # Use -C / to handle absolute paths, --null and -T for the list
-                            # --no-recursion since we've already resolved the file list
-                            cmd = [
-                                "tar",
-                                "-cf",
-                                staging_full_path,
-                                "--null",
-                                "-T",
-                                file_list_path,
-                                "--no-recursion",
-                                "--absolute-names",
-                            ]
-                            logger.debug(f"RUNNING BINARY TAR: {' '.join(cmd)}")
-                            subprocess.run(cmd, check=True, capture_output=True)
+                        if tar_binary:
+                            # Generate a null-terminated file list to handle special characters safely
+                            file_list_path = staging_full_path + ".list"
+                            with open(file_list_path, "w") as f_list:
+                                for item in remaining_to_write:
+                                    # Write absolute path to list
+                                    f_list.write(item["file_state"].file_path + "\0")
 
-                            # Update progress to 100% for this chunk
-                            processed_bytes += sum(
-                                i["offset_end"] - i["offset_start"]
-                                for i in remaining_to_write
-                            )
-                            JobManager.update_job(
-                                job_id,
-                                15.0 + (70.0 * (processed_bytes / safe_divisor)),
-                                f"Archived chunk {chunk_index+1} via binary tar",
-                            )
-                        except Exception as e:
-                            logger.error(
-                                f"Binary tar failed, falling back to Python: {e}"
-                            )
-                            has_splits = True  # Trigger fallback
-                        finally:
-                            if os.path.exists(file_list_path):
-                                os.remove(file_list_path)
+                            try:
+                                # --null must come before -T; --no-recursion and --absolute-names
+                                # must come before positional/non-option arguments
+                                cmd = [
+                                    tar_binary,
+                                    "-cf",
+                                    staging_full_path,
+                                    "--null",
+                                    "--no-recursion",
+                                    "--absolute-names",
+                                    "-T",
+                                    file_list_path,
+                                ]
+                                logger.debug(f"RUNNING BINARY TAR: {' '.join(cmd)}")
+                                subprocess.run(cmd, check=True, capture_output=True)
 
-                    if has_splits or not shutil.which("tar"):
+                                # Update progress to 100% for this chunk
+                                processed_bytes += sum(
+                                    i["offset_end"] - i["offset_start"]
+                                    for i in remaining_to_write
+                                )
+                                JobManager.update_job(
+                                    job_id,
+                                    15.0 + (70.0 * (processed_bytes / safe_divisor)),
+                                    f"Archived chunk {chunk_index+1} via binary tar",
+                                )
+                            except Exception as e:
+                                logger.error(
+                                    f"Binary tar failed, falling back to Python: {e}"
+                                )
+                                has_splits = True  # Trigger fallback
+                            finally:
+                                if os.path.exists(file_list_path):
+                                    os.remove(file_list_path)
+
+                    if has_splits:
                         # COMPATIBILITY PATH: Pure Python for fragments or if tar is missing
                         with tarfile.open(staging_full_path, "w") as tar_bundle:
                             for item in remaining_to_write:
diff --git a/backend/app/services/scanner.py b/backend/app/services/scanner.py
index 37a71ca..ee9d646 100644
--- a/backend/app/services/scanner.py
+++ b/backend/app/services/scanner.py
@@ -1,10 +1,12 @@
 import concurrent.futures
 import hashlib
 import os
+import shutil
+import subprocess
 import threading
 import time
 from datetime import datetime, timezone
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Tuple
 
 import psutil
 from loguru import logger
@@ -14,6 +16,149 @@ from sqlalchemy.orm.exc import StaleDataError
 from app.db import models
 from app.db.database import SessionLocal
 
+# Fast file discovery via `find -printf` (GNU find or compatible).
+# Detected once at import time; falls back to os.walk if unavailable.
+_FAST_FIND_BINARY: Optional[str] = None
+
+
+def _detect_fast_find() -> Optional[str]:
+    """Check if a `find` binary with `-printf` support is available.
+
+    Tries `gfind` (GNU find via Homebrew on macOS) first, then `find`.
+    Returns the binary path if `-printf` works, otherwise ``None``.
+    """
+    for candidate in ("gfind", "find"):
+        binary = shutil.which(candidate)
+        if binary is None:
+            continue
+        try:
+            result = subprocess.run(
+                [binary, "/tmp", "-maxdepth", "0", "-printf", "%f\n"],
+                capture_output=True,
+                timeout=5,
+            )
+            if result.returncode == 0 and result.stdout.strip() == b"tmp":
+                return binary
+        except Exception:
+            continue
+    return None
+
+
+def _init_fast_find() -> Optional[str]:
+    global _FAST_FIND_BINARY
+    _FAST_FIND_BINARY = _detect_fast_find()
+    if _FAST_FIND_BINARY:
+        logger.info(f"Fast file discovery enabled: using {_FAST_FIND_BINARY} -printf")
+    else:
+        logger.info("Fast file discovery unavailable: falling back to os.walk")
+    return _FAST_FIND_BINARY
+
+
+_FAST_FIND_BINARY = _init_fast_find()
+
+
+def _discover_files_fast(
+    root_base: str,
+    job_id: Optional[int],
+    batch_size: int,
+    current_timestamp,
+    resolve_tracking,
+    sync_metadata_batch,
+    metrics_lock,
+    metrics,
+    db_session: Session,
+) -> Tuple[int, int]:
+    """Walk a tree using `find -printf` for fast metadata extraction.
+
+    Returns (files_found, files_batched) counts.
+    """
+    total_files_found = 0
+    files_batched = 0
+    pending_metadata: List[Dict[str, Any]] = []
+
+    # -printf format: path\tsize\tmtime (tab-separated; split from right for safety)
+    find_binary = _FAST_FIND_BINARY  # Guarded by caller check; never None here
+    cmd = [
+        find_binary,
+        root_base,
+        "-type",
+        "f",
+        "-printf",
+        "%p\t%s\t%T@\n",
+    ]
+
+    try:
+        proc = subprocess.run(
+            cmd,
+            capture_output=True,
+            check=False,
+        )  # ty: ignore[no-matching-overload]
+        stdout = proc.stdout
+        if proc.returncode != 0:
+            logger.warning(
+                f"Fast find exited with code {proc.returncode} for {root_base}"
+            )
+            return 0, 0
+    except Exception as e:
+        logger.error(f"Fast file discovery failed for {root_base}: {e}")
+        return 0, 0
+
+    # Process output line by line (tab-separated: path\tsize\tmtime)
+    for line in stdout.split(b"\n"):
+        if job_id is not None and JobManager.is_cancelled(job_id):
+            break
+
+        if not line.strip():
+            continue
+
+        # Split from right: mtime and size are always numeric
+        parts = line.split(b"\t")
+        if len(parts) < 3:
+            continue
+
+        # First n-2 parts may be path components (tabs in filename are rare)
+        full_file_path = b"\t".join(parts[:-2]).decode("utf-8", errors="replace")
+        try:
+            file_size = int(parts[-2])
+            file_mtime = float(parts[-1])
+        except (ValueError, IndexError):
+            continue
+
+        total_files_found += 1
+        with metrics_lock:
+            metrics["total_files_found"] = total_files_found
+            metrics["current_path"] = os.path.dirname(full_file_path)
+
+        is_ignored = resolve_tracking(full_file_path)
+        pending_metadata.append(
+            {
+                "path": full_file_path,
+                "size": file_size,
+                "mtime": file_mtime,
+                "ignored": is_ignored,
+            }
+        )
+
+        if len(pending_metadata) >= batch_size:
+            sync_metadata_batch(db_session, pending_metadata, current_timestamp)
+            db_session.commit()
+            files_batched += len(pending_metadata)
+            pending_metadata = []
+            if job_id is not None:
+                JobManager.update_job(
+                    job_id,
+                    10.0,
+                    f"Discovered {total_files_found} items...",
+                )
+
+    # Flush remaining batch
+    if pending_metadata:
+        sync_metadata_batch(db_session, pending_metadata, current_timestamp)
+        db_session.commit()
+        files_batched += len(pending_metadata)
+
+    return total_files_found, files_batched
+
 
 class JobManager:
     """Manages operational job states and persistence with high resilience for background threads."""
@@ -162,12 +307,12 @@ class ScannerService:
             if level == "background":
                 if hasattr(p, "ionice"):
                     p.ionice(
-                        psutil.IOPRIO_CLASS_IDLE
-                    )  # ty:ignore[unresolved-attribute]
+                        psutil.IOPRIO_CLASS_IDLE  # ty: ignore[unresolved-attribute]
+                    )
                 p.nice(19)
             else:
                 if hasattr(p, "ionice"):
-                    p.ionice(psutil.IOPRIO_CLASS_BE)  # ty:ignore[unresolved-attribute]
+                    p.ionice(psutil.IOPRIO_CLASS_BE)  # ty: ignore[unresolved-attribute]
                 p.nice(0)
         except Exception:
             pass
@@ -266,42 +411,63 @@ class ScannerService:
                 if not os.path.exists(root_base):
                     continue
 
-                for current_dir, sub_dirs, file_names in os.walk(root_base):
-                    if job_id is not None and JobManager.is_cancelled(job_id):
-                        break
+                if _FAST_FIND_BINARY:
+                    # Fast path: GNU find -printf (metadata extracted in C)
+                    metrics = {
+                        "total_files_found": 0,
+                        "current_path": root_base,
+                    }
+                    found, _ = _discover_files_fast(
+                        root_base,
+                        job_id,
+                        BATCH_SIZE,
+                        current_timestamp,
+                        resolve_tracking,
+                        self._sync_metadata_batch,
+                        self._metrics_lock,
+                        metrics,
+                        db_session,
+                    )
+                    with self._metrics_lock:
+                        self.total_files_found += found
+                else:
+                    # Compatibility path: Python os.walk + os.stat
+                    for current_dir, _sub_dirs, file_names in os.walk(root_base):
+                        if job_id is not None and JobManager.is_cancelled(job_id):
+                            break
 
-                    for name in file_names:
-                        full_file_path = os.path.join(current_dir, name)
-                        with self._metrics_lock:
-                            self.total_files_found += 1
-                            self.current_path = current_dir
+                        for name in file_names:
+                            full_file_path = os.path.join(current_dir, name)
+                            with self._metrics_lock:
+                                self.total_files_found += 1
+                                self.current_path = current_dir
 
-                        try:
-                            file_stats = os.stat(full_file_path)
-                            is_ignored = resolve_tracking(full_file_path)
-                            pending_metadata.append(
-                                {
-                                    "path": full_file_path,
-                                    "size": file_stats.st_size,
-                                    "mtime": file_stats.st_mtime,
-                                    "ignored": is_ignored,
-                                }
-                            )
-                        except (OSError, FileNotFoundError):
-                            continue
-
-                        if len(pending_metadata) >= BATCH_SIZE:
-                            self._sync_metadata_batch(
-                                db_session, pending_metadata, current_timestamp
-                            )
-                            db_session.commit()
-                            pending_metadata = []
-                            if job_id is not None:
-                                JobManager.update_job(
-                                    job_id,
-                                    10.0,
-                                    f"Discovered {self.total_files_found} items...",
+                            try:
+                                file_stats = os.stat(full_file_path)
+                                is_ignored = resolve_tracking(full_file_path)
+                                pending_metadata.append(
+                                    {
+                                        "path": full_file_path,
+                                        "size": file_stats.st_size,
+                                        "mtime": file_stats.st_mtime,
+                                        "ignored": is_ignored,
+                                    }
                                 )
+                            except (OSError, FileNotFoundError):
+                                continue
+
+                            if len(pending_metadata) >= BATCH_SIZE:
+                                self._sync_metadata_batch(
+                                    db_session, pending_metadata, current_timestamp
+                                )
+                                db_session.commit()
+                                pending_metadata = []
+                                if job_id is not None:
+                                    JobManager.update_job(
+                                        job_id,
+                                        10.0,
+                                        f"Discovered {self.total_files_found} items...",
+                                    )
 
             if pending_metadata:
                 self._sync_metadata_batch(
diff --git a/backend/tests/test_service_scanner.py b/backend/tests/test_service_scanner.py
index cd9606f..149cf09 100644
--- a/backend/tests/test_service_scanner.py
+++ b/backend/tests/test_service_scanner.py
@@ -110,6 +110,9 @@ def test_scan_sources_mocked(db_session, mocker):
     """Tests the discovery scan with mocked filesystem."""
     scanner = ScannerService()
 
+    # Disable fast find so the test uses the os.walk fallback path
+    mocker.patch("app.services.scanner._FAST_FIND_BINARY", None)
+
     # Mock settings
     mocker.patch("app.api.system.get_source_roots", return_value=["/mock_source"])
     mocker.patch("app.api.system.get_exclusion_spec", return_value=None)
diff --git a/frontend/playwright.config.ts b/frontend/playwright.config.ts
index 6d513c8..a3d53c8 100644
--- a/frontend/playwright.config.ts
+++ b/frontend/playwright.config.ts
@@ -5,6 +5,7 @@ import { defineConfig, devices } from '@playwright/test';
  */
 export default defineConfig({
   testDir: './tests',
+  timeout: 120000,
   /* Run tests in files in parallel */
   fullyParallel: false,
   /* Fail the build on CI if you accidentally left test.only in the source code. */
@@ -35,6 +36,12 @@ export default defineConfig({
 
   /* Run your local dev server before starting the tests */
   webServer: [
+    {
+      command: 'cd ../backend && rm -f e2e_test.db* && DATABASE_URL="sqlite:///e2e_test.db" TAPEHOARD_TEST_MODE="true" uv run alembic upgrade head && DATABASE_URL="sqlite:///e2e_test.db" TAPEHOARD_TEST_MODE="true" uv run uvicorn app.main:app --host 0.0.0.0 --port 8001',
+      url: 'http://localhost:8001/health',
+      reuseExistingServer: !process.env.CI,
+      timeout: 120 * 1000,
+    },
     {
       command: 'VITE_API_URL=http://localhost:8001 npm run dev',
       url: 'http://localhost:5173',
diff --git a/frontend/src/lib/components/file-browser/FileBrowser.svelte b/frontend/src/lib/components/file-browser/FileBrowser.svelte
index 77ebbcf..bf577cf 100644
--- a/frontend/src/lib/components/file-browser/FileBrowser.svelte
+++ b/frontend/src/lib/components/file-browser/FileBrowser.svelte
@@ -205,23 +205,28 @@
                 return crumbs;
         });
 
-        const filteredFiles = $derived.by(() => {
-                // When doing backend search, the parent feeds us already-filtered results.
-                // We'll still do a light local filter to ensure things like name matching,
-                // but we should match on the full path just in case.
-                let result = files.filter((f: FileItem) => f.path.toLowerCase().includes((searchQuery || "").toLowerCase()));
+	const filteredFiles = $derived.by(() => {
+		let result = files.filter((f: FileItem) => f.path.toLowerCase().includes((searchQuery || "").toLowerCase()));
 
-                result.sort((a: FileItem, b: FileItem) => {
-                        const valA = sortColumn === "type" ? a.type : a[sortColumn as keyof FileItem] || 0;
-                        const valB = sortColumn === "type" ? b.type : b[sortColumn as keyof FileItem] || 0;
+		// Deduplicate by path to prevent keyed each block errors
+		const seen = new Set<string>();
+		result = result.filter((f: FileItem) => {
+			if (seen.has(f.path)) return false;
+			seen.add(f.path);
+			return true;
+		});
 
-                        if (valA < (valB as any)) return sortDirection === "asc" ? -1 : 1;
-                        if (valA > (valB as any)) return sortDirection === "asc" ? 1 : -1;
-                        return 0;
-                });
+		result.sort((a: FileItem, b: FileItem) => {
+			const valA = sortColumn === "type" ? a.type : a[sortColumn as keyof FileItem] || 0;
+			const valB = sortColumn === "type" ? b.type : b[sortColumn as keyof FileItem] || 0;
 
-                return result;
-        });
+			if (valA < (valB as any)) return sortDirection === "asc" ? -1 : 1;
+			if (valA > (valB as any)) return sortDirection === "asc" ? 1 : -1;
+			return 0;
+		});
+
+		return result;
+	});
 
         function toggleSort(col: typeof sortColumn) {
                 if (sortColumn === col) {
diff --git a/frontend/tests/e2e.test.ts b/frontend/tests/e2e.test.ts
index 581df61..d5f3297 100644
--- a/frontend/tests/e2e.test.ts
+++ b/frontend/tests/e2e.test.ts
@@ -1,32 +1,166 @@
 import { test, expect } from '@playwright/test';
+import fs from 'fs';
+import path from 'path';
+
+const SOURCE_ROOT = '/tmp/tapehoard_e2e_source';
+const MOCK_LTO_PATH = '/tmp/tapehoard_e2e_mock_lto';
+const RESTORE_DEST = '/tmp/tapehoard_e2e_restore';
+const API_URL = 'http://localhost:8001';
 
 test.describe('TapeHoard Golden Path', () => {
-  test('homepage loads and shows basic navigation', async ({ page }) => {
-    await page.goto('/');
-
-    // Validate the page title or basic UI elements exist
-    // This assumes there's some header or title indicating TapeHoard
-    await expect(page).toHaveTitle(/TapeHoard|Svelte/i);
-
-    // Check if navigation links are visible
-    // We expect links to Backup Manager, Media Inventory, Archive Index etc based on E2E.md
-    const nav = page.locator('nav');
-    if (await nav.count() > 0) {
-      await expect(page.locator('text=Inventory').first()).toBeVisible();
-      await expect(page.locator('text=Archive').first()).toBeVisible();
+  test.beforeAll(async ({ playwright }) => {
+    // 0. Reset Backend Environment
+    const requestContext = await playwright.request.newContext();
+    const resetResponse = await requestContext.post(`${API_URL}/system/test/reset`);
+    if (!resetResponse.ok()) {
+        console.error('Failed to reset test environment');
     }
+
+    // 1. Create source data
+    if (fs.existsSync(SOURCE_ROOT)) {
+      fs.rmSync(SOURCE_ROOT, { recursive: true });
+    }
+    fs.mkdirSync(SOURCE_ROOT, { recursive: true });
+    fs.writeFileSync(path.join(SOURCE_ROOT, 'test_file_1.txt'), 'Hello world 1');
+    fs.mkdirSync(path.join(SOURCE_ROOT, 'subfolder'));
+    fs.writeFileSync(path.join(SOURCE_ROOT, 'subfolder', 'test_file_2.txt'), 'Hello world 2');
+
+    // Create mock LTO dir
+    if (fs.existsSync(MOCK_LTO_PATH)) {
+      fs.rmSync(MOCK_LTO_PATH, { recursive: true });
+    }
+    fs.mkdirSync(MOCK_LTO_PATH, { recursive: true });
+
+    // Ensure restore destination exists
+    if (fs.existsSync(RESTORE_DEST)) {
+        fs.rmSync(RESTORE_DEST, { recursive: true });
+    }
+    fs.mkdirSync(RESTORE_DEST, { recursive: true });
+
+    // Configure backend via API
+    await requestContext.post(`${API_URL}/system/settings`, {
+      data: { key: 'source_roots', value: JSON.stringify([SOURCE_ROOT]) }
+    });
+    await requestContext.post(`${API_URL}/system/settings`, {
+        data: { key: 'restore_destinations', value: JSON.stringify([RESTORE_DEST]) }
+    });
+    await requestContext.dispose();
   });
 
-  test('media inventory shows mock provider when in test mode', async ({ page }) => {
-    await page.goto('/inventory');
+  test('full ingestion, archival, and recovery workflow', async ({ page }) => {
+    page.on('console', msg => console.log('BROWSER CONSOLE:', msg.text()));
+    page.on('pageerror', err => console.log('BROWSER ERROR:', err.message));
 
-    // Wait for the page to be fully loaded and hydrated
+    console.log('Step 1: Discovery & Tracking');
+    await page.goto('/filesystem');
     await page.waitForLoadState('networkidle');
 
-    // Click the Register media button to open the dialog
+    await expect(page.getByText(SOURCE_ROOT).first()).toBeVisible();
+    await page.getByText(SOURCE_ROOT).first().click();
+    await page.waitForLoadState('networkidle');
+    await expect(page.getByText('test_file_1.txt')).toBeVisible();
+    await expect(page.getByText('subfolder')).toBeVisible();
+
+    const fileRow1 = page.locator('div[role="button"]', { hasText: 'test_file_1.txt' });
+    await fileRow1.locator('div').first().click();
+
+    await page.getByRole('button', { name: /Commit rules/i }).click();
+    await expect(page.getByText(/Changes committed/i)).toBeVisible();
+
+    console.log('Step 2: Indexing');
+    await page.goto('/');
+    await page.waitForLoadState('networkidle');
+    await page.getByRole('button', { name: /Start scan/i }).click();
+    await expect(page.getByText(/Scan job initiated/i)).toBeVisible();
+
+    await expect(async () => {
+        await page.getByRole('button', { name: /Refresh/i }).click();
+        const monitoredCountText = await page.locator('h4').first().textContent();
+        const monitoredCount = parseInt(monitoredCountText?.replace(/,/g, '') || '0');
+        console.log(`Current monitored count: ${monitoredCount}`);
+        expect(monitoredCount).toBeGreaterThan(0);
+    }).toPass({ timeout: 20000 });
+
+    console.log('Step 3: Media Registration');
+    await page.goto('/inventory');
+    await page.waitForLoadState('networkidle');
+    console.log('Clicking Register media button');
     await page.getByRole('button', { name: /Register media/i }).click();
 
-    // Check for the Mock provider text inside the dialog
+    console.log('Waiting for Mock LTO Tape text');
     await expect(page.getByText('Mock LTO Tape (Test)')).toBeVisible({ timeout: 10000 });
+    await page.getByText('Mock LTO Tape (Test)').click();
+
+    console.log('Filling form');
+    await page.getByLabel('Identifier (Barcode/SN)').fill('TAPE001');
+    await page.getByLabel('Capacity (GB)').fill('100');
+    await page.getByLabel('Mock Directory Path').fill(MOCK_LTO_PATH);
+
+    await page.getByRole('button', { name: 'Register media' }).last().click();
+    await expect(page.getByText(/TAPE001 registered/i)).toBeVisible();
+
+    console.log('Step 4: Initialization');
+    page.on('dialog', dialog => {
+        console.log('Dialog opened: ', dialog.message());
+        dialog.accept();
+    });
+    await page.getByRole('button', { name: /Initialize/i }).click();
+    await expect(page.getByText(/initialized successfully/i)).toBeVisible({ timeout: 10000 });
+
+    console.log('Step 5: Archival');
+    await expect(page.getByText('TAPE001', { exact: true })).toBeVisible();
+    await page.getByRole('button', { name: /Auto archive/i }).click();
+    await expect(page.getByText(/Archival job initiated/i)).toBeVisible();
+
+    console.log('Step 6: Waiting for archival job');
+    await page.goto('/jobs');
+    const backupJob = page.locator('div', { hasText: /BACKUP/i }).filter({ hasText: /JOB #/ }).first();
+    await expect(backupJob.getByText('COMPLETED', { exact: true }).first()).toBeVisible({ timeout: 60000 });
+
+    console.log('Step 7: Verify Protection');
+    await page.goto('/index-browser');
+    await page.waitForLoadState('networkidle');
+    await page.getByText(SOURCE_ROOT).first().dblclick();
+    await page.getByText('subfolder').first().dblclick();
+    await expect(page.getByText('test_file_2.txt')).toBeVisible();
+    await expect(page.getByText('TAPE001')).toBeVisible();
+
+    console.log('Step 8: Data Recovery');
+    const fileRow = page.locator('div[role="button"]', { hasText: 'test_file_2.txt' });
+    await fileRow.locator('button[role="checkbox"]').click();
+    await expect(page.getByText(/2 items in queue/i)).toBeVisible();
+
+    await page.goto('/restores');
+    await page.waitForLoadState('networkidle');
+    await page.getByRole("treeitem").getByText('/tmp/tapehoard_e2e_source').click();
+    await page.waitForLoadState('networkidle');
+    await page.getByText('subfolder').dblclick();
+    await expect(page.getByText('test_file_2.txt')).toBeVisible();
+
+    await page.locator('select#destination').selectOption(RESTORE_DEST);
+    await page.getByRole('button', { name: /Initiate recovery/i }).click();
+    await expect(page.getByText(/Recovery job initiated/i)).toBeVisible();
+
+    console.log('Step 9: Waiting for restore job');
+    await page.goto('/jobs');
+    const restoreJob = page.locator('div', { hasText: /RESTORE/i }).filter({ hasText: /JOB #/ }).first();
+    await expect(restoreJob.getByText('COMPLETED', { exact: true }).first()).toBeVisible({ timeout: 60000 });
+
+    console.log('Step 10: Verify disk');
+    const restoredFilePath = path.join(RESTORE_DEST, SOURCE_ROOT, 'subfolder', 'test_file_2.txt');
+    await page.waitForTimeout(2000);
+
+    if (!fs.existsSync(restoredFilePath)) {
+        const fallbackPath = path.join(RESTORE_DEST, 'test_file_2.txt');
+        if (fs.existsSync(fallbackPath)) {
+            expect(fs.readFileSync(fallbackPath, 'utf-8')).toBe('Hello world 2');
+        } else {
+            const files = fs.readdirSync(RESTORE_DEST, { recursive: true });
+            console.log('Restore DEST contents:', files);
+            throw new Error(`Restored file not found. Present: ${files.join(', ')}`);
+        }
+    } else {
+        expect(fs.readFileSync(restoredFilePath, 'utf-8')).toBe('Hello world 2');
+    }
   });
 });
diff --git a/justfile b/justfile
index 44c60a5..c278e1f 100644
--- a/justfile
+++ b/justfile
@@ -92,13 +92,7 @@ docker-down:
 
 # --- End-to-End Testing ---
 
-# Run the backend in test mode
-e2e-server:
-    @echo "Starting Backend in Test Mode..."
-    cd backend && DATABASE_URL="sqlite:///test.db" TAPEHOARD_TEST_MODE="true" uv run alembic upgrade head
-    cd backend && DATABASE_URL="sqlite:///test.db" TAPEHOARD_TEST_MODE="true" uv run uvicorn app.main:app --host 0.0.0.0 --port 8001
-
-# Run playwright tests
-e2e:
+# Run playwright tests (automatically starts mock backend)
+playwright:
     @echo "Running Playwright E2E Tests..."
     cd frontend && npx playwright test