filesystem view uses index only

don't reuse playwright server
2026-05-04 20:44:49 -04:00 · 2026-05-04 20:19:04 -04:00
4 changed files with 98 additions and 95 deletions
@@ -50,6 +50,15 @@ All API routes live under `app/api/`. The `system` endpoints are split into a pa
 Each module defines its own `APIRouter` with `tags=["System"]` and is registered in `main.py` with `prefix="/system"`.
 ### Index-Only Principle
 **Never rely on the live filesystem for data, except during a scan.** All read endpoints must operate exclusively on the database index. The filesystem is only accessed during:
 - **Scan operations** (`/system/scan`) — to discover files, compute hashes, and sync the index.
 - **Configuration endpoints** (`/system/ls`, `/system/browse` when path is outside roots) — to help users pick source roots during setup.
 Browsing the archive, searching, or checking protection status must use the index only. This guarantees consistent results even when files are temporarily inaccessible, and prevents I/O bottlenecks on network or tape-backed storage.
 ### Shared Helpers (`app/api/common.py`)
 Cross-cutting helpers and schemas that must not create circular imports:
@@ -13,7 +13,6 @@ from app.api.common import (
 )
 from sqlalchemy import text
 from app.db import models
 import os
 router = APIRouter(tags=["System"])
@@ -24,7 +23,11 @@ router = APIRouter(tags=["System"])
 def browse_system_path(
    path: Optional[str] = None, db_session: Session = Depends(get_db)
 ):
-    """Provides a browsable view of the indexed filesystem from the database."""
+    """Provides a browsable view of the indexed filesystem from the database.
    Operates exclusively on the database index (index-only principle).
    Never falls back to the live filesystem.
    """
    roots = get_source_roots(db_session)
    tracking_rules = db_session.query(models.TrackedSource).all()
    tracking_map = {rule.path: rule.action for rule in tracking_rules}
@@ -57,105 +60,79 @@ def browse_system_path(
        target_prefix.replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_")
    )
-    files_sql = text("""
+    # --- Files directly under this path (non-recursive) ---
    file_sql = text("""
        SELECT file_path, size, mtime, sha256_hash, is_ignored
        FROM filesystem_state
-        WHERE file_path LIKE :prefix ESCAPE '\\'
+        WHERE file_path LIKE :prefix_wildcard ESCAPE '\\'
-        AND file_path != :prefix
+          AND file_path NOT LIKE :prefix_nested ESCAPE '\\'
          AND file_path != :prefix
    """)
-    rows = db_session.execute(files_sql, {"prefix": f"{escaped_prefix}%"}).fetchall()
+    file_rows = db_session.execute(
        file_sql,
        {
            "prefix": target_prefix,
            "prefix_wildcard": f"{escaped_prefix}%",
            "prefix_nested": f"{escaped_prefix}%/%",
        },
    ).fetchall()
-    if not rows and os.path.isdir(path):
+    results: list[FileItemSchema] = []
-        try:
+    seen: set[str] = set()
-            live_results = []
+
-            with os.scandir(path) as it:
+    for file_path, size, mtime, sha256_hash, is_ignored in file_rows:
-                for entry in it:
+        if file_path not in seen:
-                    try:
+            seen.add(file_path)
-                        if entry.name.startswith("."):
+            results.append(
-                            continue
+                FileItemSchema(
-                        entry_path = entry.path
+                    name=file_path.split("/")[-1],
-                        is_dir = entry.is_dir()
+                    path=file_path,
-                        is_ignored = get_ignored_status(
+                    type="file",
-                            entry_path + "/" if is_dir else entry_path,
+                    size=size,
-                            tracking_map,
+                    mtime=mtime,
-                            exclusion_spec,
+                    ignored=is_ignored,
-                        )
+                    sha256_hash=sha256_hash,
-                        if is_dir:
+                )
                            live_results.append(
                                FileItemSchema(
                                    name=entry.name,
                                    path=entry_path,
                                    type="directory",
                                    ignored=is_ignored,
                                )
                            )
                        else:
                            stat = entry.stat()
                            live_results.append(
                                FileItemSchema(
                                    name=entry.name,
                                    path=entry_path,
                                    type="file",
                                    size=stat.st_size,
                                    mtime=stat.st_mtime,
                                    ignored=is_ignored,
                                    sha256_hash=None,
                                )
                            )
                    except OSError:
                        continue
            live_results.sort(key=lambda x: (x.type != "directory", x.name.lower()))
            return BrowseResponseSchema(
                files=live_results, last_scan_time=last_scan_time
            )
        except OSError:
            pass
-    # Aggregate sizes for directories from indexed rows
+    # --- Directories under this path (aggregated via GROUP BY) ---
-    dir_sizes: dict[str, int] = {}
+    dir_sql = text("""
-    for file_path, size, _mtime, _sha256_hash, _is_ignored in rows:
+        SELECT
-        relative = file_path[len(target_prefix) :]
+            SUBSTR(file_path, LENGTH(:prefix) + 1,
-        if "/" in relative:
+                   INSTR(SUBSTR(file_path, LENGTH(:prefix) + 1), '/') - 1) as dir_name,
-            immediate_name = relative.split("/")[0]
+            SUM(size) as total_size
-            child_path = target_prefix + immediate_name
+        FROM filesystem_state
-            dir_sizes[child_path] = dir_sizes.get(child_path, 0) + (size or 0)
+        WHERE file_path LIKE :prefix_wildcard ESCAPE '\\'
          AND file_path != :prefix
          AND INSTR(SUBSTR(file_path, LENGTH(:prefix) + 1), '/') > 0
        GROUP BY dir_name
    """)
    dir_rows = db_session.execute(
        dir_sql,
        {
            "prefix": target_prefix,
            "prefix_wildcard": f"{escaped_prefix}%",
        },
    ).fetchall()
-    results = []
+    for dir_name, total_size in dir_rows:
-    seen = set()
+        if not dir_name or dir_name == "/":
-
+            continue
-    for file_path, size, mtime, sha256_hash, is_ignored in rows:
+        child_path = target_prefix + dir_name
-        relative = file_path[len(target_prefix) :]
+        if child_path not in seen:
-        if "/" in relative:
+            seen.add(child_path)
-            immediate_name = relative.split("/")[0]
+            dir_ignored = get_ignored_status(
-            child_path = target_prefix + immediate_name
+                child_path + "/", tracking_map, exclusion_spec
-            if child_path not in seen:
+            )
-                seen.add(child_path)
+            results.append(
-                dir_ignored = get_ignored_status(
+                FileItemSchema(
-                    child_path + "/", tracking_map, exclusion_spec
+                    name=dir_name,
-                )
+                    path=child_path,
-                results.append(
+                    type="directory",
-                    FileItemSchema(
+                    size=total_size or 0,
-                        name=immediate_name,
+                    ignored=dir_ignored,
                        path=child_path,
                        type="directory",
                        size=dir_sizes.get(child_path, 0),
                        ignored=dir_ignored,
                    )
                )
        else:
            if file_path not in seen:
                seen.add(file_path)
                results.append(
                    FileItemSchema(
                        name=relative,
                        path=file_path,
                        type="file",
                        size=size,
                        mtime=mtime,
                        ignored=is_ignored,
                        sha256_hash=sha256_hash,
                    )
                )
            )
    results.sort(key=lambda x: (x.type != "directory", x.name.lower()))
    return BrowseResponseSchema(files=results, last_scan_time=last_scan_time)
@@ -38,13 +38,13 @@ export default defineConfig({
    {
       command: 'cd ../backend && rm -f e2e_test.db* && DATABASE_URL="sqlite:///e2e_test.db" TAPEHOARD_TEST_MODE="true" TAPEHOARD_CORS_ORIGINS="*,http://localhost:5174,http://127.0.0.1:5174" uv run python -m app.start_test_server --host 127.0.0.1 --port 8001',
      url: 'http://127.0.0.1:8001/health',
-      reuseExistingServer: !process.env.CI,
+      reuseExistingServer: false,
      timeout: 120 * 1000,
    },
    {
      command: 'VITE_API_URL=http://127.0.0.1:8001 npm run dev -- --port 5174',
      url: 'http://localhost:5174',
-      reuseExistingServer: !process.env.CI,
+      reuseExistingServer: false,
      timeout: 120 * 1000,
    },
  ],
@@ -44,6 +44,23 @@ test.describe('TapeHoard Golden Path', () => {
    await requestContext.post(`${API_URL}/system/settings`, {
        data: { key: 'restore_destinations', value: JSON.stringify([RESTORE_DEST]) }
    });
    // Index-only principle: scan first so /system/browse can show files
    const scanResp = await requestContext.post(`${API_URL}/system/scan`);
    if (!scanResp.ok()) {
        console.error('Failed to trigger initial scan');
    }
    // Wait for scan to complete
    const deadline = Date.now() + 30000;
    while (Date.now() < deadline) {
        const statusResp = await requestContext.get(`${API_URL}/system/scan/status`);
        const status = await statusResp.json();
        if (!status.is_running) {
            break;
        }
        await new Promise(r => setTimeout(r, 500));
    }
    await requestContext.dispose();
  });
Author	SHA1	Message	Date
adamlamers	1ff21e3c2c	filesystem view uses index only Continuous Integration / backend-tests (push) Successful in 33s Details Continuous Integration / e2e-tests (push) Successful in 12m19s Details Continuous Integration / frontend-check (push) Successful in 27s Details	2026-05-04 20:44:49 -04:00
adamlamers	544bd14cbb	don't reuse playwright server	2026-05-04 20:19:04 -04:00