From dd84532de95bc6eb0a581621a9afb1b298f47bb8 Mon Sep 17 00:00:00 2001 From: Adam Lamers Date: Wed, 29 Apr 2026 16:21:05 -0400 Subject: [PATCH] optionally use find binary if gnu and available --- .github/workflows/ci.yml | 10 +- GEMINI.md | 6 +- backend/app/api/inventory.py | 11 +- backend/app/api/system.py | 35 +++ backend/app/providers/base.py | 2 +- backend/app/providers/cloud.py | 2 +- backend/app/providers/mock.py | 2 +- backend/app/services/archiver.py | 97 +++---- backend/app/services/scanner.py | 240 +++++++++++++++--- backend/tests/test_service_scanner.py | 3 + frontend/playwright.config.ts | 7 + .../file-browser/FileBrowser.svelte | 33 ++- frontend/tests/e2e.test.ts | 170 +++++++++++-- justfile | 10 +- 14 files changed, 493 insertions(+), 135 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ebcb4f7..a3d2596 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -87,21 +87,13 @@ jobs: npm ci npx playwright install --with-deps chromium - - name: Start Backend Test Server - run: | - cd backend - uv sync --dev - DATABASE_URL="sqlite:///test.db" TAPEHOARD_TEST_MODE="true" uv run alembic upgrade head - DATABASE_URL="sqlite:///test.db" TAPEHOARD_TEST_MODE="true" uv run uvicorn app.main:app --host 0.0.0.0 --port 8001 & - sleep 5 # Wait for server to start - - name: Run Playwright Tests run: | cd frontend npx playwright test - name: Upload Playwright Report - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v3 if: always() with: name: playwright-report diff --git a/GEMINI.md b/GEMINI.md index e526c95..d6f2eba 100644 --- a/GEMINI.md +++ b/GEMINI.md @@ -76,8 +76,10 @@ This document (`GEMINI.md`) contains critical, contextual information about the ### Deployment & Testing * **Temporal Standard:** Backend uses **UTC**. Frontend uses `parseUTCDate` to convert to browser **Local Time**. * **Unsaved Changes Guard:** UI must use `beforeNavigate` and `beforeunload` listeners to warn users if they leave the Settings or Media registration forms with uncommitted changes. -* **Testing Protocol:** Use **Alembic-driven file-based SQLite** for tests to ensure 100% schema fidelity (including FTS5 and triggers) and reliable cross-thread data visibility. Atomic truncation must occur between tests. -* `just pytest` to run backend tests +* **Backend Testing:** Use **Alembic-driven file-based SQLite** for tests to ensure 100% schema fidelity (including FTS5 and triggers) and reliable cross-thread data visibility. Atomic truncation must occur between tests. Run `just pytest` to execute backend tests. +* **End-to-End (E2E) Testing:** Playwright is used for E2E testing (`frontend/tests/`). + * **Mock Hardware:** To simulate LTO drives in CI, the backend supports a `TAPEHOARD_TEST_MODE=true` flag. This registers a `MockLTOProvider` that uses local directories instead of physical SCSI devices. + * **Running E2E:** Use `just e2e-server` to start the mock backend (on port 8001), and then `just playwright` to execute the Playwright test suite against it. ### UI & UX Philosophy * **Direct Terminology:** Use technical terms like "Backup Manager", "System Status", "Archive Index". Avoid marketing fluff. diff --git a/backend/app/api/inventory.py b/backend/app/api/inventory.py index b29717d..c2370e7 100644 --- a/backend/app/api/inventory.py +++ b/backend/app/api/inventory.py @@ -68,7 +68,7 @@ def list_storage_providers(): if os.environ.get("TAPEHOARD_TEST_MODE") == "true": from app.providers.mock import MockLTOProvider - providers.append(MockLTOProvider) + providers.append(MockLTOProvider) # ty: ignore[invalid-argument-type] return [ StorageProviderSchema( @@ -756,6 +756,15 @@ def browse_archive_index(path: str = "ROOT", db_session: Session = Depends(get_d } ) + # Deduplicate by path to prevent frontend keyed each block errors + seen_paths: set[str] = set() + deduped_results: list[dict] = [] + for r in results: + if r["path"] not in seen_paths: + seen_paths.add(r["path"]) + deduped_results.append(r) + results = deduped_results + return results diff --git a/backend/app/api/system.py b/backend/app/api/system.py index 8043764..eccd989 100644 --- a/backend/app/api/system.py +++ b/backend/app/api/system.py @@ -160,6 +160,32 @@ def get_ignored_status( # --- Endpoints --- +@router.post("/test/reset") +def reset_test_environment(db_session: Session = Depends(get_db)): + """Wipes the database and resets state for E2E testing.""" + import os + + if os.environ.get("TAPEHOARD_TEST_MODE") != "true": + raise HTTPException(status_code=403, detail="Reset only allowed in test mode") + + # Wipe tables + db_session.query(models.FileVersion).delete() + db_session.query(models.RestoreCart).delete() + db_session.query(models.Job).delete() + db_session.query(models.TrackedSource).delete() + db_session.query(models.FilesystemState).delete() + db_session.query(models.StorageMedia).delete() + # Note: Keep SystemSettings if needed, or wipe them too + db_session.query(models.SystemSetting).delete() + + db_session.commit() + + # Clear mock hardware dirs if we can find them + # But usually the test will re-initialize them + + return {"message": "Test environment reset"} + + @router.get("/dashboard/stats", response_model=DashboardStatsSchema) def get_dashboard_stats(db_session: Session = Depends(get_db)): """Computes high-level system statistics for the overview dashboard.""" @@ -439,6 +465,15 @@ def browse_system_path( except PermissionError: raise HTTPException(status_code=403, detail="Permission denied") + # Deduplicate by path to prevent frontend keyed each block errors + seen_paths: set[str] = set() + deduped_results: list[FileItemSchema] = [] + for r in results: + if r.path not in seen_paths: + seen_paths.add(r.path) + deduped_results.append(r) + results = deduped_results + results.sort(key=lambda x: (x.type != "directory", x.name.lower())) return results diff --git a/backend/app/providers/base.py b/backend/app/providers/base.py index ee008e1..e7f906b 100644 --- a/backend/app/providers/base.py +++ b/backend/app/providers/base.py @@ -47,7 +47,7 @@ class AbstractStorageProvider(ABC): return {"status": "HEALTHY", "alerts": []} @abstractmethod - def identify_media(self) -> Optional[str]: + def identify_media(self, allow_intrusive=True) -> Optional[str]: """ Attempts to read the identifier (barcode/UUID) from the currently inserted media. Returns None if no media is inserted or it's unidentifiable. diff --git a/backend/app/providers/cloud.py b/backend/app/providers/cloud.py index a58038e..e207c88 100644 --- a/backend/app/providers/cloud.py +++ b/backend/app/providers/cloud.py @@ -128,7 +128,7 @@ class CloudStorageProvider(AbstractStorageProvider): except Exception: return False - def identify_media(self) -> Optional[str]: + def identify_media(self, allow_intrusive=True) -> Optional[str]: try: response = self.s3.get_object(Bucket=self.bucket_name, Key=".tapehoard_id") return response["Body"].read().decode("utf-8").strip() diff --git a/backend/app/providers/mock.py b/backend/app/providers/mock.py index b7bbac2..7c3e459 100644 --- a/backend/app/providers/mock.py +++ b/backend/app/providers/mock.py @@ -47,7 +47,7 @@ class MockLTOProvider(AbstractStorageProvider): return True return False - def identify_media(self) -> Optional[str]: + def identify_media(self, allow_intrusive=True) -> Optional[str]: if not os.path.exists(self.mam_path): return None try: diff --git a/backend/app/services/archiver.py b/backend/app/services/archiver.py index 81f74e8..79bed08 100644 --- a/backend/app/services/archiver.py +++ b/backend/app/services/archiver.py @@ -2,6 +2,7 @@ import json import os import shutil import subprocess +import sys import tarfile import time import uuid @@ -104,7 +105,9 @@ class ArchiverService: if os.environ.get("TAPEHOARD_TEST_MODE") == "true": from app.providers.mock import MockLTOProvider - provider_map[MockLTOProvider.provider_id] = MockLTOProvider + provider_map[MockLTOProvider.provider_id] = ( + MockLTOProvider # ty: ignore[invalid-assignment] + ) provider_cls = provider_map.get(media_record.media_type) if not provider_cls: @@ -447,51 +450,59 @@ class ArchiverService: # Sequential Media (Tape): Hybrid Tar Generation has_splits = any(item["is_split"] for item in remaining_to_write) - if not has_splits and shutil.which("tar"): - # PERFORMANCE PATH: Use system tar binary for whole files - # Generate a null-terminated file list to handle special characters safely - file_list_path = staging_full_path + ".list" - with open(file_list_path, "w") as f_list: - for item in remaining_to_write: - # Write absolute path to list - f_list.write(item["file_state"].file_path + "\0") + if not has_splits: + # PERFORMANCE PATH: Use GNU tar binary for whole files + # Prefer gtar on Darwin (macOS ships BSD tar without --null support) + tar_binary = None + if sys.platform == "darwin": + tar_binary = shutil.which("gtar") + if tar_binary is None: + tar_binary = shutil.which("tar") - try: - # Use -C / to handle absolute paths, --null and -T for the list - # --no-recursion since we've already resolved the file list - cmd = [ - "tar", - "-cf", - staging_full_path, - "--null", - "-T", - file_list_path, - "--no-recursion", - "--absolute-names", - ] - logger.debug(f"RUNNING BINARY TAR: {' '.join(cmd)}") - subprocess.run(cmd, check=True, capture_output=True) + if tar_binary: + # Generate a null-terminated file list to handle special characters safely + file_list_path = staging_full_path + ".list" + with open(file_list_path, "w") as f_list: + for item in remaining_to_write: + # Write absolute path to list + f_list.write(item["file_state"].file_path + "\0") - # Update progress to 100% for this chunk - processed_bytes += sum( - i["offset_end"] - i["offset_start"] - for i in remaining_to_write - ) - JobManager.update_job( - job_id, - 15.0 + (70.0 * (processed_bytes / safe_divisor)), - f"Archived chunk {chunk_index+1} via binary tar", - ) - except Exception as e: - logger.error( - f"Binary tar failed, falling back to Python: {e}" - ) - has_splits = True # Trigger fallback - finally: - if os.path.exists(file_list_path): - os.remove(file_list_path) + try: + # --null must come before -T; --no-recursion and --absolute-names + # must come before positional/non-option arguments + cmd = [ + tar_binary, + "-cf", + staging_full_path, + "--null", + "--no-recursion", + "--absolute-names", + "-T", + file_list_path, + ] + logger.debug(f"RUNNING BINARY TAR: {' '.join(cmd)}") + subprocess.run(cmd, check=True, capture_output=True) - if has_splits or not shutil.which("tar"): + # Update progress to 100% for this chunk + processed_bytes += sum( + i["offset_end"] - i["offset_start"] + for i in remaining_to_write + ) + JobManager.update_job( + job_id, + 15.0 + (70.0 * (processed_bytes / safe_divisor)), + f"Archived chunk {chunk_index+1} via binary tar", + ) + except Exception as e: + logger.error( + f"Binary tar failed, falling back to Python: {e}" + ) + has_splits = True # Trigger fallback + finally: + if os.path.exists(file_list_path): + os.remove(file_list_path) + + if has_splits: # COMPATIBILITY PATH: Pure Python for fragments or if tar is missing with tarfile.open(staging_full_path, "w") as tar_bundle: for item in remaining_to_write: diff --git a/backend/app/services/scanner.py b/backend/app/services/scanner.py index 37a71ca..ee9d646 100644 --- a/backend/app/services/scanner.py +++ b/backend/app/services/scanner.py @@ -1,10 +1,12 @@ import concurrent.futures import hashlib import os +import shutil +import subprocess import threading import time from datetime import datetime, timezone -from typing import Any, Dict, List, Optional +from typing import Any, Dict, List, Optional, Tuple import psutil from loguru import logger @@ -14,6 +16,149 @@ from sqlalchemy.orm.exc import StaleDataError from app.db import models from app.db.database import SessionLocal +# Fast file discovery via `find -printf` (GNU find or compatible). +# Detected once at import time; falls back to os.walk if unavailable. +_FAST_FIND_BINARY: Optional[str] = None + + +def _detect_fast_find() -> Optional[str]: + """Check if a `find` binary with `-printf` support is available. + + Tries `gfind` (GNU find via Homebrew on macOS) first, then `find`. + Returns the binary path if `-printf` works, otherwise ``None``. + """ + for candidate in ("gfind", "find"): + binary = shutil.which(candidate) + if binary is None: + continue + try: + result = subprocess.run( + [binary, "/tmp", "-maxdepth", "0", "-printf", "%f\n"], + capture_output=True, + timeout=5, + ) + if result.returncode == 0 and result.stdout.strip() == b"tmp": + return binary + except Exception: + continue + return None + + +def _init_fast_find() -> Optional[str]: + global _FAST_FIND_BINARY + _FAST_FIND_BINARY = _detect_fast_find() + if _FAST_FIND_BINARY: + logger.info(f"Fast file discovery enabled: using {_FAST_FIND_BINARY} -printf") + else: + logger.info("Fast file discovery unavailable: falling back to os.walk") + return _FAST_FIND_BINARY + + +_FAST_FIND_BINARY = _init_fast_find() + + +def _discover_files_fast( + root_base: str, + job_id: Optional[int], + batch_size: int, + current_timestamp, + resolve_tracking, + sync_metadata_batch, + metrics_lock, + metrics, + db_session: Session, +) -> Tuple[int, int]: + """Walk a tree using `find -printf` for fast metadata extraction. + + Returns (files_found, files_batched) counts. + """ + total_files_found = 0 + files_batched = 0 + pending_metadata: List[Dict[str, Any]] = [] + + # -printf format: path\tsize\tmtime (tab-separated; split from right for safety) + find_binary = _FAST_FIND_BINARY # Guarded by caller check; never None here + cmd = [ + find_binary, + root_base, + "-type", + "f", + "-printf", + "%p\t%s\t%T@\n", + ] + + try: + proc = subprocess.run( + cmd, + capture_output=True, + check=False, + ) # ty: ignore[no-matching-overload] + stdout = proc.stdout + if proc.returncode != 0: + logger.warning( + f"Fast find exited with code {proc.returncode} for {root_base}" + ) + return 0, 0 + except Exception as e: + logger.error(f"Fast file discovery failed for {root_base}: {e}") + return 0, 0 + + # Process output line by line (tab-separated: path\tsize\tmtime) + for line in stdout.split(b"\n"): + if job_id is not None and JobManager.is_cancelled(job_id): + break + + if not line.strip(): + continue + + # Split from right: mtime and size are always numeric + parts = line.split(b"\t") + if len(parts) < 3: + continue + + # First n-2 parts may be path components (tabs in filename are rare) + full_file_path = b"\t".join(parts[:-2]).decode("utf-8", errors="replace") + try: + file_size = int(parts[-2]) + file_mtime = float(parts[-1]) + except (ValueError, IndexError): + continue + + total_files_found += 1 + with metrics_lock: + metrics["total_files_found"] = total_files_found + metrics["current_path"] = os.path.dirname(full_file_path) + + is_ignored = resolve_tracking(full_file_path) + pending_metadata.append( + { + "path": full_file_path, + "size": file_size, + "mtime": file_mtime, + "ignored": is_ignored, + } + ) + + if len(pending_metadata) >= batch_size: + sync_metadata_batch(db_session, pending_metadata, current_timestamp) + db_session.commit() + files_batched += len(pending_metadata) + pending_metadata = [] + if job_id is not None: + JobManager.update_job( + job_id, + 10.0, + f"Discovered {total_files_found} items...", + ) + + # Flush remaining batch + if pending_metadata: + sync_metadata_batch(db_session, pending_metadata, current_timestamp) + db_session.commit() + files_batched += len(pending_metadata) + + return total_files_found, files_batched + class JobManager: """Manages operational job states and persistence with high resilience for background threads.""" @@ -162,12 +307,12 @@ class ScannerService: if level == "background": if hasattr(p, "ionice"): p.ionice( - psutil.IOPRIO_CLASS_IDLE - ) # ty:ignore[unresolved-attribute] + psutil.IOPRIO_CLASS_IDLE # ty: ignore[unresolved-attribute] + ) p.nice(19) else: if hasattr(p, "ionice"): - p.ionice(psutil.IOPRIO_CLASS_BE) # ty:ignore[unresolved-attribute] + p.ionice(psutil.IOPRIO_CLASS_BE) # ty: ignore[unresolved-attribute] p.nice(0) except Exception: pass @@ -266,42 +411,63 @@ class ScannerService: if not os.path.exists(root_base): continue - for current_dir, sub_dirs, file_names in os.walk(root_base): - if job_id is not None and JobManager.is_cancelled(job_id): - break + if _FAST_FIND_BINARY: + # Fast path: GNU find -printf (metadata extracted in C) + metrics = { + "total_files_found": 0, + "current_path": root_base, + } + found, _ = _discover_files_fast( + root_base, + job_id, + BATCH_SIZE, + current_timestamp, + resolve_tracking, + self._sync_metadata_batch, + self._metrics_lock, + metrics, + db_session, + ) + with self._metrics_lock: + self.total_files_found += found + else: + # Compatibility path: Python os.walk + os.stat + for current_dir, _sub_dirs, file_names in os.walk(root_base): + if job_id is not None and JobManager.is_cancelled(job_id): + break - for name in file_names: - full_file_path = os.path.join(current_dir, name) - with self._metrics_lock: - self.total_files_found += 1 - self.current_path = current_dir + for name in file_names: + full_file_path = os.path.join(current_dir, name) + with self._metrics_lock: + self.total_files_found += 1 + self.current_path = current_dir - try: - file_stats = os.stat(full_file_path) - is_ignored = resolve_tracking(full_file_path) - pending_metadata.append( - { - "path": full_file_path, - "size": file_stats.st_size, - "mtime": file_stats.st_mtime, - "ignored": is_ignored, - } - ) - except (OSError, FileNotFoundError): - continue - - if len(pending_metadata) >= BATCH_SIZE: - self._sync_metadata_batch( - db_session, pending_metadata, current_timestamp - ) - db_session.commit() - pending_metadata = [] - if job_id is not None: - JobManager.update_job( - job_id, - 10.0, - f"Discovered {self.total_files_found} items...", + try: + file_stats = os.stat(full_file_path) + is_ignored = resolve_tracking(full_file_path) + pending_metadata.append( + { + "path": full_file_path, + "size": file_stats.st_size, + "mtime": file_stats.st_mtime, + "ignored": is_ignored, + } ) + except (OSError, FileNotFoundError): + continue + + if len(pending_metadata) >= BATCH_SIZE: + self._sync_metadata_batch( + db_session, pending_metadata, current_timestamp + ) + db_session.commit() + pending_metadata = [] + if job_id is not None: + JobManager.update_job( + job_id, + 10.0, + f"Discovered {self.total_files_found} items...", + ) if pending_metadata: self._sync_metadata_batch( diff --git a/backend/tests/test_service_scanner.py b/backend/tests/test_service_scanner.py index cd9606f..149cf09 100644 --- a/backend/tests/test_service_scanner.py +++ b/backend/tests/test_service_scanner.py @@ -110,6 +110,9 @@ def test_scan_sources_mocked(db_session, mocker): """Tests the discovery scan with mocked filesystem.""" scanner = ScannerService() + # Disable fast find so the test uses the os.walk fallback path + mocker.patch("app.services.scanner._FAST_FIND_BINARY", None) + # Mock settings mocker.patch("app.api.system.get_source_roots", return_value=["/mock_source"]) mocker.patch("app.api.system.get_exclusion_spec", return_value=None) diff --git a/frontend/playwright.config.ts b/frontend/playwright.config.ts index 6d513c8..a3d53c8 100644 --- a/frontend/playwright.config.ts +++ b/frontend/playwright.config.ts @@ -5,6 +5,7 @@ import { defineConfig, devices } from '@playwright/test'; */ export default defineConfig({ testDir: './tests', + timeout: 120000, /* Run tests in files in parallel */ fullyParallel: false, /* Fail the build on CI if you accidentally left test.only in the source code. */ @@ -35,6 +36,12 @@ export default defineConfig({ /* Run your local dev server before starting the tests */ webServer: [ + { + command: 'cd ../backend && rm -f e2e_test.db* && DATABASE_URL="sqlite:///e2e_test.db" TAPEHOARD_TEST_MODE="true" uv run alembic upgrade head && DATABASE_URL="sqlite:///e2e_test.db" TAPEHOARD_TEST_MODE="true" uv run uvicorn app.main:app --host 0.0.0.0 --port 8001', + url: 'http://localhost:8001/health', + reuseExistingServer: !process.env.CI, + timeout: 120 * 1000, + }, { command: 'VITE_API_URL=http://localhost:8001 npm run dev', url: 'http://localhost:5173', diff --git a/frontend/src/lib/components/file-browser/FileBrowser.svelte b/frontend/src/lib/components/file-browser/FileBrowser.svelte index 77ebbcf..bf577cf 100644 --- a/frontend/src/lib/components/file-browser/FileBrowser.svelte +++ b/frontend/src/lib/components/file-browser/FileBrowser.svelte @@ -205,23 +205,28 @@ return crumbs; }); - const filteredFiles = $derived.by(() => { - // When doing backend search, the parent feeds us already-filtered results. - // We'll still do a light local filter to ensure things like name matching, - // but we should match on the full path just in case. - let result = files.filter((f: FileItem) => f.path.toLowerCase().includes((searchQuery || "").toLowerCase())); + const filteredFiles = $derived.by(() => { + let result = files.filter((f: FileItem) => f.path.toLowerCase().includes((searchQuery || "").toLowerCase())); - result.sort((a: FileItem, b: FileItem) => { - const valA = sortColumn === "type" ? a.type : a[sortColumn as keyof FileItem] || 0; - const valB = sortColumn === "type" ? b.type : b[sortColumn as keyof FileItem] || 0; + // Deduplicate by path to prevent keyed each block errors + const seen = new Set(); + result = result.filter((f: FileItem) => { + if (seen.has(f.path)) return false; + seen.add(f.path); + return true; + }); - if (valA < (valB as any)) return sortDirection === "asc" ? -1 : 1; - if (valA > (valB as any)) return sortDirection === "asc" ? 1 : -1; - return 0; - }); + result.sort((a: FileItem, b: FileItem) => { + const valA = sortColumn === "type" ? a.type : a[sortColumn as keyof FileItem] || 0; + const valB = sortColumn === "type" ? b.type : b[sortColumn as keyof FileItem] || 0; - return result; - }); + if (valA < (valB as any)) return sortDirection === "asc" ? -1 : 1; + if (valA > (valB as any)) return sortDirection === "asc" ? 1 : -1; + return 0; + }); + + return result; + }); function toggleSort(col: typeof sortColumn) { if (sortColumn === col) { diff --git a/frontend/tests/e2e.test.ts b/frontend/tests/e2e.test.ts index 581df61..d5f3297 100644 --- a/frontend/tests/e2e.test.ts +++ b/frontend/tests/e2e.test.ts @@ -1,32 +1,166 @@ import { test, expect } from '@playwright/test'; +import fs from 'fs'; +import path from 'path'; + +const SOURCE_ROOT = '/tmp/tapehoard_e2e_source'; +const MOCK_LTO_PATH = '/tmp/tapehoard_e2e_mock_lto'; +const RESTORE_DEST = '/tmp/tapehoard_e2e_restore'; +const API_URL = 'http://localhost:8001'; test.describe('TapeHoard Golden Path', () => { - test('homepage loads and shows basic navigation', async ({ page }) => { - await page.goto('/'); - - // Validate the page title or basic UI elements exist - // This assumes there's some header or title indicating TapeHoard - await expect(page).toHaveTitle(/TapeHoard|Svelte/i); - - // Check if navigation links are visible - // We expect links to Backup Manager, Media Inventory, Archive Index etc based on E2E.md - const nav = page.locator('nav'); - if (await nav.count() > 0) { - await expect(page.locator('text=Inventory').first()).toBeVisible(); - await expect(page.locator('text=Archive').first()).toBeVisible(); + test.beforeAll(async ({ playwright }) => { + // 0. Reset Backend Environment + const requestContext = await playwright.request.newContext(); + const resetResponse = await requestContext.post(`${API_URL}/system/test/reset`); + if (!resetResponse.ok()) { + console.error('Failed to reset test environment'); } + + // 1. Create source data + if (fs.existsSync(SOURCE_ROOT)) { + fs.rmSync(SOURCE_ROOT, { recursive: true }); + } + fs.mkdirSync(SOURCE_ROOT, { recursive: true }); + fs.writeFileSync(path.join(SOURCE_ROOT, 'test_file_1.txt'), 'Hello world 1'); + fs.mkdirSync(path.join(SOURCE_ROOT, 'subfolder')); + fs.writeFileSync(path.join(SOURCE_ROOT, 'subfolder', 'test_file_2.txt'), 'Hello world 2'); + + // Create mock LTO dir + if (fs.existsSync(MOCK_LTO_PATH)) { + fs.rmSync(MOCK_LTO_PATH, { recursive: true }); + } + fs.mkdirSync(MOCK_LTO_PATH, { recursive: true }); + + // Ensure restore destination exists + if (fs.existsSync(RESTORE_DEST)) { + fs.rmSync(RESTORE_DEST, { recursive: true }); + } + fs.mkdirSync(RESTORE_DEST, { recursive: true }); + + // Configure backend via API + await requestContext.post(`${API_URL}/system/settings`, { + data: { key: 'source_roots', value: JSON.stringify([SOURCE_ROOT]) } + }); + await requestContext.post(`${API_URL}/system/settings`, { + data: { key: 'restore_destinations', value: JSON.stringify([RESTORE_DEST]) } + }); + await requestContext.dispose(); }); - test('media inventory shows mock provider when in test mode', async ({ page }) => { - await page.goto('/inventory'); + test('full ingestion, archival, and recovery workflow', async ({ page }) => { + page.on('console', msg => console.log('BROWSER CONSOLE:', msg.text())); + page.on('pageerror', err => console.log('BROWSER ERROR:', err.message)); - // Wait for the page to be fully loaded and hydrated + console.log('Step 1: Discovery & Tracking'); + await page.goto('/filesystem'); await page.waitForLoadState('networkidle'); - // Click the Register media button to open the dialog + await expect(page.getByText(SOURCE_ROOT).first()).toBeVisible(); + await page.getByText(SOURCE_ROOT).first().click(); + await page.waitForLoadState('networkidle'); + await expect(page.getByText('test_file_1.txt')).toBeVisible(); + await expect(page.getByText('subfolder')).toBeVisible(); + + const fileRow1 = page.locator('div[role="button"]', { hasText: 'test_file_1.txt' }); + await fileRow1.locator('div').first().click(); + + await page.getByRole('button', { name: /Commit rules/i }).click(); + await expect(page.getByText(/Changes committed/i)).toBeVisible(); + + console.log('Step 2: Indexing'); + await page.goto('/'); + await page.waitForLoadState('networkidle'); + await page.getByRole('button', { name: /Start scan/i }).click(); + await expect(page.getByText(/Scan job initiated/i)).toBeVisible(); + + await expect(async () => { + await page.getByRole('button', { name: /Refresh/i }).click(); + const monitoredCountText = await page.locator('h4').first().textContent(); + const monitoredCount = parseInt(monitoredCountText?.replace(/,/g, '') || '0'); + console.log(`Current monitored count: ${monitoredCount}`); + expect(monitoredCount).toBeGreaterThan(0); + }).toPass({ timeout: 20000 }); + + console.log('Step 3: Media Registration'); + await page.goto('/inventory'); + await page.waitForLoadState('networkidle'); + console.log('Clicking Register media button'); await page.getByRole('button', { name: /Register media/i }).click(); - // Check for the Mock provider text inside the dialog + console.log('Waiting for Mock LTO Tape text'); await expect(page.getByText('Mock LTO Tape (Test)')).toBeVisible({ timeout: 10000 }); + await page.getByText('Mock LTO Tape (Test)').click(); + + console.log('Filling form'); + await page.getByLabel('Identifier (Barcode/SN)').fill('TAPE001'); + await page.getByLabel('Capacity (GB)').fill('100'); + await page.getByLabel('Mock Directory Path').fill(MOCK_LTO_PATH); + + await page.getByRole('button', { name: 'Register media' }).last().click(); + await expect(page.getByText(/TAPE001 registered/i)).toBeVisible(); + + console.log('Step 4: Initialization'); + page.on('dialog', dialog => { + console.log('Dialog opened: ', dialog.message()); + dialog.accept(); + }); + await page.getByRole('button', { name: /Initialize/i }).click(); + await expect(page.getByText(/initialized successfully/i)).toBeVisible({ timeout: 10000 }); + + console.log('Step 5: Archival'); + await expect(page.getByText('TAPE001', { exact: true })).toBeVisible(); + await page.getByRole('button', { name: /Auto archive/i }).click(); + await expect(page.getByText(/Archival job initiated/i)).toBeVisible(); + + console.log('Step 6: Waiting for archival job'); + await page.goto('/jobs'); + const backupJob = page.locator('div', { hasText: /BACKUP/i }).filter({ hasText: /JOB #/ }).first(); + await expect(backupJob.getByText('COMPLETED', { exact: true }).first()).toBeVisible({ timeout: 60000 }); + + console.log('Step 7: Verify Protection'); + await page.goto('/index-browser'); + await page.waitForLoadState('networkidle'); + await page.getByText(SOURCE_ROOT).first().dblclick(); + await page.getByText('subfolder').first().dblclick(); + await expect(page.getByText('test_file_2.txt')).toBeVisible(); + await expect(page.getByText('TAPE001')).toBeVisible(); + + console.log('Step 8: Data Recovery'); + const fileRow = page.locator('div[role="button"]', { hasText: 'test_file_2.txt' }); + await fileRow.locator('button[role="checkbox"]').click(); + await expect(page.getByText(/2 items in queue/i)).toBeVisible(); + + await page.goto('/restores'); + await page.waitForLoadState('networkidle'); + await page.getByRole("treeitem").getByText('/tmp/tapehoard_e2e_source').click(); + await page.waitForLoadState('networkidle'); + await page.getByText('subfolder').dblclick(); + await expect(page.getByText('test_file_2.txt')).toBeVisible(); + + await page.locator('select#destination').selectOption(RESTORE_DEST); + await page.getByRole('button', { name: /Initiate recovery/i }).click(); + await expect(page.getByText(/Recovery job initiated/i)).toBeVisible(); + + console.log('Step 9: Waiting for restore job'); + await page.goto('/jobs'); + const restoreJob = page.locator('div', { hasText: /RESTORE/i }).filter({ hasText: /JOB #/ }).first(); + await expect(restoreJob.getByText('COMPLETED', { exact: true }).first()).toBeVisible({ timeout: 60000 }); + + console.log('Step 10: Verify disk'); + const restoredFilePath = path.join(RESTORE_DEST, SOURCE_ROOT, 'subfolder', 'test_file_2.txt'); + await page.waitForTimeout(2000); + + if (!fs.existsSync(restoredFilePath)) { + const fallbackPath = path.join(RESTORE_DEST, 'test_file_2.txt'); + if (fs.existsSync(fallbackPath)) { + expect(fs.readFileSync(fallbackPath, 'utf-8')).toBe('Hello world 2'); + } else { + const files = fs.readdirSync(RESTORE_DEST, { recursive: true }); + console.log('Restore DEST contents:', files); + throw new Error(`Restored file not found. Present: ${files.join(', ')}`); + } + } else { + expect(fs.readFileSync(restoredFilePath, 'utf-8')).toBe('Hello world 2'); + } }); }); diff --git a/justfile b/justfile index 44c60a5..c278e1f 100644 --- a/justfile +++ b/justfile @@ -92,13 +92,7 @@ docker-down: # --- End-to-End Testing --- -# Run the backend in test mode -e2e-server: - @echo "Starting Backend in Test Mode..." - cd backend && DATABASE_URL="sqlite:///test.db" TAPEHOARD_TEST_MODE="true" uv run alembic upgrade head - cd backend && DATABASE_URL="sqlite:///test.db" TAPEHOARD_TEST_MODE="true" uv run uvicorn app.main:app --host 0.0.0.0 --port 8001 - -# Run playwright tests -e2e: +# Run playwright tests (automatically starts mock backend) +playwright: @echo "Running Playwright E2E Tests..." cd frontend && npx playwright test