Compare commits

...

2 Commits

Author SHA1 Message Date
adamlamers 1ff21e3c2c filesystem view uses index only
Continuous Integration / backend-tests (push) Successful in 33s
Continuous Integration / e2e-tests (push) Successful in 12m19s
Continuous Integration / frontend-check (push) Successful in 27s
2026-05-04 20:44:49 -04:00
adamlamers 544bd14cbb don't reuse playwright server 2026-05-04 20:19:04 -04:00
4 changed files with 98 additions and 95 deletions
+9
View File
@@ -50,6 +50,15 @@ All API routes live under `app/api/`. The `system` endpoints are split into a pa
Each module defines its own `APIRouter` with `tags=["System"]` and is registered in `main.py` with `prefix="/system"`.
### Index-Only Principle
**Never rely on the live filesystem for data, except during a scan.** All read endpoints must operate exclusively on the database index. The filesystem is only accessed during:
- **Scan operations** (`/system/scan`) — to discover files, compute hashes, and sync the index.
- **Configuration endpoints** (`/system/ls`, `/system/browse` when path is outside roots) — to help users pick source roots during setup.
Browsing the archive, searching, or checking protection status must use the index only. This guarantees consistent results even when files are temporarily inaccessible, and prevents I/O bottlenecks on network or tape-backed storage.
### Shared Helpers (`app/api/common.py`)
Cross-cutting helpers and schemas that must not create circular imports:
+70 -93
View File
@@ -13,7 +13,6 @@ from app.api.common import (
)
from sqlalchemy import text
from app.db import models
import os
router = APIRouter(tags=["System"])
@@ -24,7 +23,11 @@ router = APIRouter(tags=["System"])
def browse_system_path(
path: Optional[str] = None, db_session: Session = Depends(get_db)
):
"""Provides a browsable view of the indexed filesystem from the database."""
"""Provides a browsable view of the indexed filesystem from the database.
Operates exclusively on the database index (index-only principle).
Never falls back to the live filesystem.
"""
roots = get_source_roots(db_session)
tracking_rules = db_session.query(models.TrackedSource).all()
tracking_map = {rule.path: rule.action for rule in tracking_rules}
@@ -57,105 +60,79 @@ def browse_system_path(
target_prefix.replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_")
)
files_sql = text("""
# --- Files directly under this path (non-recursive) ---
file_sql = text("""
SELECT file_path, size, mtime, sha256_hash, is_ignored
FROM filesystem_state
WHERE file_path LIKE :prefix ESCAPE '\\'
AND file_path != :prefix
WHERE file_path LIKE :prefix_wildcard ESCAPE '\\'
AND file_path NOT LIKE :prefix_nested ESCAPE '\\'
AND file_path != :prefix
""")
rows = db_session.execute(files_sql, {"prefix": f"{escaped_prefix}%"}).fetchall()
file_rows = db_session.execute(
file_sql,
{
"prefix": target_prefix,
"prefix_wildcard": f"{escaped_prefix}%",
"prefix_nested": f"{escaped_prefix}%/%",
},
).fetchall()
if not rows and os.path.isdir(path):
try:
live_results = []
with os.scandir(path) as it:
for entry in it:
try:
if entry.name.startswith("."):
continue
entry_path = entry.path
is_dir = entry.is_dir()
is_ignored = get_ignored_status(
entry_path + "/" if is_dir else entry_path,
tracking_map,
exclusion_spec,
)
if is_dir:
live_results.append(
FileItemSchema(
name=entry.name,
path=entry_path,
type="directory",
ignored=is_ignored,
)
)
else:
stat = entry.stat()
live_results.append(
FileItemSchema(
name=entry.name,
path=entry_path,
type="file",
size=stat.st_size,
mtime=stat.st_mtime,
ignored=is_ignored,
sha256_hash=None,
)
)
except OSError:
continue
live_results.sort(key=lambda x: (x.type != "directory", x.name.lower()))
return BrowseResponseSchema(
files=live_results, last_scan_time=last_scan_time
results: list[FileItemSchema] = []
seen: set[str] = set()
for file_path, size, mtime, sha256_hash, is_ignored in file_rows:
if file_path not in seen:
seen.add(file_path)
results.append(
FileItemSchema(
name=file_path.split("/")[-1],
path=file_path,
type="file",
size=size,
mtime=mtime,
ignored=is_ignored,
sha256_hash=sha256_hash,
)
)
except OSError:
pass
# Aggregate sizes for directories from indexed rows
dir_sizes: dict[str, int] = {}
for file_path, size, _mtime, _sha256_hash, _is_ignored in rows:
relative = file_path[len(target_prefix) :]
if "/" in relative:
immediate_name = relative.split("/")[0]
child_path = target_prefix + immediate_name
dir_sizes[child_path] = dir_sizes.get(child_path, 0) + (size or 0)
# --- Directories under this path (aggregated via GROUP BY) ---
dir_sql = text("""
SELECT
SUBSTR(file_path, LENGTH(:prefix) + 1,
INSTR(SUBSTR(file_path, LENGTH(:prefix) + 1), '/') - 1) as dir_name,
SUM(size) as total_size
FROM filesystem_state
WHERE file_path LIKE :prefix_wildcard ESCAPE '\\'
AND file_path != :prefix
AND INSTR(SUBSTR(file_path, LENGTH(:prefix) + 1), '/') > 0
GROUP BY dir_name
""")
dir_rows = db_session.execute(
dir_sql,
{
"prefix": target_prefix,
"prefix_wildcard": f"{escaped_prefix}%",
},
).fetchall()
results = []
seen = set()
for file_path, size, mtime, sha256_hash, is_ignored in rows:
relative = file_path[len(target_prefix) :]
if "/" in relative:
immediate_name = relative.split("/")[0]
child_path = target_prefix + immediate_name
if child_path not in seen:
seen.add(child_path)
dir_ignored = get_ignored_status(
child_path + "/", tracking_map, exclusion_spec
)
results.append(
FileItemSchema(
name=immediate_name,
path=child_path,
type="directory",
size=dir_sizes.get(child_path, 0),
ignored=dir_ignored,
)
)
else:
if file_path not in seen:
seen.add(file_path)
results.append(
FileItemSchema(
name=relative,
path=file_path,
type="file",
size=size,
mtime=mtime,
ignored=is_ignored,
sha256_hash=sha256_hash,
)
for dir_name, total_size in dir_rows:
if not dir_name or dir_name == "/":
continue
child_path = target_prefix + dir_name
if child_path not in seen:
seen.add(child_path)
dir_ignored = get_ignored_status(
child_path + "/", tracking_map, exclusion_spec
)
results.append(
FileItemSchema(
name=dir_name,
path=child_path,
type="directory",
size=total_size or 0,
ignored=dir_ignored,
)
)
results.sort(key=lambda x: (x.type != "directory", x.name.lower()))
return BrowseResponseSchema(files=results, last_scan_time=last_scan_time)
+2 -2
View File
@@ -38,13 +38,13 @@ export default defineConfig({
{
command: 'cd ../backend && rm -f e2e_test.db* && DATABASE_URL="sqlite:///e2e_test.db" TAPEHOARD_TEST_MODE="true" TAPEHOARD_CORS_ORIGINS="*,http://localhost:5174,http://127.0.0.1:5174" uv run python -m app.start_test_server --host 127.0.0.1 --port 8001',
url: 'http://127.0.0.1:8001/health',
reuseExistingServer: !process.env.CI,
reuseExistingServer: false,
timeout: 120 * 1000,
},
{
command: 'VITE_API_URL=http://127.0.0.1:8001 npm run dev -- --port 5174',
url: 'http://localhost:5174',
reuseExistingServer: !process.env.CI,
reuseExistingServer: false,
timeout: 120 * 1000,
},
],
+17
View File
@@ -44,6 +44,23 @@ test.describe('TapeHoard Golden Path', () => {
await requestContext.post(`${API_URL}/system/settings`, {
data: { key: 'restore_destinations', value: JSON.stringify([RESTORE_DEST]) }
});
// Index-only principle: scan first so /system/browse can show files
const scanResp = await requestContext.post(`${API_URL}/system/scan`);
if (!scanResp.ok()) {
console.error('Failed to trigger initial scan');
}
// Wait for scan to complete
const deadline = Date.now() + 30000;
while (Date.now() < deadline) {
const statusResp = await requestContext.get(`${API_URL}/system/scan/status`);
const status = await statusResp.json();
if (!status.is_running) {
break;
}
await new Promise(r => setTimeout(r, 500));
}
await requestContext.dispose();
});