Compare commits
2 Commits
078ea8931b
...
1ff21e3c2c
| Author | SHA1 | Date | |
|---|---|---|---|
| 1ff21e3c2c | |||
| 544bd14cbb |
@@ -50,6 +50,15 @@ All API routes live under `app/api/`. The `system` endpoints are split into a pa
|
||||
|
||||
Each module defines its own `APIRouter` with `tags=["System"]` and is registered in `main.py` with `prefix="/system"`.
|
||||
|
||||
### Index-Only Principle
|
||||
|
||||
**Never rely on the live filesystem for data, except during a scan.** All read endpoints must operate exclusively on the database index. The filesystem is only accessed during:
|
||||
|
||||
- **Scan operations** (`/system/scan`) — to discover files, compute hashes, and sync the index.
|
||||
- **Configuration endpoints** (`/system/ls`, `/system/browse` when path is outside roots) — to help users pick source roots during setup.
|
||||
|
||||
Browsing the archive, searching, or checking protection status must use the index only. This guarantees consistent results even when files are temporarily inaccessible, and prevents I/O bottlenecks on network or tape-backed storage.
|
||||
|
||||
### Shared Helpers (`app/api/common.py`)
|
||||
|
||||
Cross-cutting helpers and schemas that must not create circular imports:
|
||||
|
||||
@@ -13,7 +13,6 @@ from app.api.common import (
|
||||
)
|
||||
from sqlalchemy import text
|
||||
from app.db import models
|
||||
import os
|
||||
|
||||
router = APIRouter(tags=["System"])
|
||||
|
||||
@@ -24,7 +23,11 @@ router = APIRouter(tags=["System"])
|
||||
def browse_system_path(
|
||||
path: Optional[str] = None, db_session: Session = Depends(get_db)
|
||||
):
|
||||
"""Provides a browsable view of the indexed filesystem from the database."""
|
||||
"""Provides a browsable view of the indexed filesystem from the database.
|
||||
|
||||
Operates exclusively on the database index (index-only principle).
|
||||
Never falls back to the live filesystem.
|
||||
"""
|
||||
roots = get_source_roots(db_session)
|
||||
tracking_rules = db_session.query(models.TrackedSource).all()
|
||||
tracking_map = {rule.path: rule.action for rule in tracking_rules}
|
||||
@@ -57,105 +60,79 @@ def browse_system_path(
|
||||
target_prefix.replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_")
|
||||
)
|
||||
|
||||
files_sql = text("""
|
||||
# --- Files directly under this path (non-recursive) ---
|
||||
file_sql = text("""
|
||||
SELECT file_path, size, mtime, sha256_hash, is_ignored
|
||||
FROM filesystem_state
|
||||
WHERE file_path LIKE :prefix ESCAPE '\\'
|
||||
AND file_path != :prefix
|
||||
WHERE file_path LIKE :prefix_wildcard ESCAPE '\\'
|
||||
AND file_path NOT LIKE :prefix_nested ESCAPE '\\'
|
||||
AND file_path != :prefix
|
||||
""")
|
||||
rows = db_session.execute(files_sql, {"prefix": f"{escaped_prefix}%"}).fetchall()
|
||||
file_rows = db_session.execute(
|
||||
file_sql,
|
||||
{
|
||||
"prefix": target_prefix,
|
||||
"prefix_wildcard": f"{escaped_prefix}%",
|
||||
"prefix_nested": f"{escaped_prefix}%/%",
|
||||
},
|
||||
).fetchall()
|
||||
|
||||
if not rows and os.path.isdir(path):
|
||||
try:
|
||||
live_results = []
|
||||
with os.scandir(path) as it:
|
||||
for entry in it:
|
||||
try:
|
||||
if entry.name.startswith("."):
|
||||
continue
|
||||
entry_path = entry.path
|
||||
is_dir = entry.is_dir()
|
||||
is_ignored = get_ignored_status(
|
||||
entry_path + "/" if is_dir else entry_path,
|
||||
tracking_map,
|
||||
exclusion_spec,
|
||||
)
|
||||
if is_dir:
|
||||
live_results.append(
|
||||
FileItemSchema(
|
||||
name=entry.name,
|
||||
path=entry_path,
|
||||
type="directory",
|
||||
ignored=is_ignored,
|
||||
)
|
||||
)
|
||||
else:
|
||||
stat = entry.stat()
|
||||
live_results.append(
|
||||
FileItemSchema(
|
||||
name=entry.name,
|
||||
path=entry_path,
|
||||
type="file",
|
||||
size=stat.st_size,
|
||||
mtime=stat.st_mtime,
|
||||
ignored=is_ignored,
|
||||
sha256_hash=None,
|
||||
)
|
||||
)
|
||||
except OSError:
|
||||
continue
|
||||
live_results.sort(key=lambda x: (x.type != "directory", x.name.lower()))
|
||||
return BrowseResponseSchema(
|
||||
files=live_results, last_scan_time=last_scan_time
|
||||
results: list[FileItemSchema] = []
|
||||
seen: set[str] = set()
|
||||
|
||||
for file_path, size, mtime, sha256_hash, is_ignored in file_rows:
|
||||
if file_path not in seen:
|
||||
seen.add(file_path)
|
||||
results.append(
|
||||
FileItemSchema(
|
||||
name=file_path.split("/")[-1],
|
||||
path=file_path,
|
||||
type="file",
|
||||
size=size,
|
||||
mtime=mtime,
|
||||
ignored=is_ignored,
|
||||
sha256_hash=sha256_hash,
|
||||
)
|
||||
)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
# Aggregate sizes for directories from indexed rows
|
||||
dir_sizes: dict[str, int] = {}
|
||||
for file_path, size, _mtime, _sha256_hash, _is_ignored in rows:
|
||||
relative = file_path[len(target_prefix) :]
|
||||
if "/" in relative:
|
||||
immediate_name = relative.split("/")[0]
|
||||
child_path = target_prefix + immediate_name
|
||||
dir_sizes[child_path] = dir_sizes.get(child_path, 0) + (size or 0)
|
||||
# --- Directories under this path (aggregated via GROUP BY) ---
|
||||
dir_sql = text("""
|
||||
SELECT
|
||||
SUBSTR(file_path, LENGTH(:prefix) + 1,
|
||||
INSTR(SUBSTR(file_path, LENGTH(:prefix) + 1), '/') - 1) as dir_name,
|
||||
SUM(size) as total_size
|
||||
FROM filesystem_state
|
||||
WHERE file_path LIKE :prefix_wildcard ESCAPE '\\'
|
||||
AND file_path != :prefix
|
||||
AND INSTR(SUBSTR(file_path, LENGTH(:prefix) + 1), '/') > 0
|
||||
GROUP BY dir_name
|
||||
""")
|
||||
dir_rows = db_session.execute(
|
||||
dir_sql,
|
||||
{
|
||||
"prefix": target_prefix,
|
||||
"prefix_wildcard": f"{escaped_prefix}%",
|
||||
},
|
||||
).fetchall()
|
||||
|
||||
results = []
|
||||
seen = set()
|
||||
|
||||
for file_path, size, mtime, sha256_hash, is_ignored in rows:
|
||||
relative = file_path[len(target_prefix) :]
|
||||
if "/" in relative:
|
||||
immediate_name = relative.split("/")[0]
|
||||
child_path = target_prefix + immediate_name
|
||||
if child_path not in seen:
|
||||
seen.add(child_path)
|
||||
dir_ignored = get_ignored_status(
|
||||
child_path + "/", tracking_map, exclusion_spec
|
||||
)
|
||||
results.append(
|
||||
FileItemSchema(
|
||||
name=immediate_name,
|
||||
path=child_path,
|
||||
type="directory",
|
||||
size=dir_sizes.get(child_path, 0),
|
||||
ignored=dir_ignored,
|
||||
)
|
||||
)
|
||||
else:
|
||||
if file_path not in seen:
|
||||
seen.add(file_path)
|
||||
results.append(
|
||||
FileItemSchema(
|
||||
name=relative,
|
||||
path=file_path,
|
||||
type="file",
|
||||
size=size,
|
||||
mtime=mtime,
|
||||
ignored=is_ignored,
|
||||
sha256_hash=sha256_hash,
|
||||
)
|
||||
for dir_name, total_size in dir_rows:
|
||||
if not dir_name or dir_name == "/":
|
||||
continue
|
||||
child_path = target_prefix + dir_name
|
||||
if child_path not in seen:
|
||||
seen.add(child_path)
|
||||
dir_ignored = get_ignored_status(
|
||||
child_path + "/", tracking_map, exclusion_spec
|
||||
)
|
||||
results.append(
|
||||
FileItemSchema(
|
||||
name=dir_name,
|
||||
path=child_path,
|
||||
type="directory",
|
||||
size=total_size or 0,
|
||||
ignored=dir_ignored,
|
||||
)
|
||||
)
|
||||
|
||||
results.sort(key=lambda x: (x.type != "directory", x.name.lower()))
|
||||
return BrowseResponseSchema(files=results, last_scan_time=last_scan_time)
|
||||
|
||||
@@ -38,13 +38,13 @@ export default defineConfig({
|
||||
{
|
||||
command: 'cd ../backend && rm -f e2e_test.db* && DATABASE_URL="sqlite:///e2e_test.db" TAPEHOARD_TEST_MODE="true" TAPEHOARD_CORS_ORIGINS="*,http://localhost:5174,http://127.0.0.1:5174" uv run python -m app.start_test_server --host 127.0.0.1 --port 8001',
|
||||
url: 'http://127.0.0.1:8001/health',
|
||||
reuseExistingServer: !process.env.CI,
|
||||
reuseExistingServer: false,
|
||||
timeout: 120 * 1000,
|
||||
},
|
||||
{
|
||||
command: 'VITE_API_URL=http://127.0.0.1:8001 npm run dev -- --port 5174',
|
||||
url: 'http://localhost:5174',
|
||||
reuseExistingServer: !process.env.CI,
|
||||
reuseExistingServer: false,
|
||||
timeout: 120 * 1000,
|
||||
},
|
||||
],
|
||||
|
||||
@@ -44,6 +44,23 @@ test.describe('TapeHoard Golden Path', () => {
|
||||
await requestContext.post(`${API_URL}/system/settings`, {
|
||||
data: { key: 'restore_destinations', value: JSON.stringify([RESTORE_DEST]) }
|
||||
});
|
||||
|
||||
// Index-only principle: scan first so /system/browse can show files
|
||||
const scanResp = await requestContext.post(`${API_URL}/system/scan`);
|
||||
if (!scanResp.ok()) {
|
||||
console.error('Failed to trigger initial scan');
|
||||
}
|
||||
// Wait for scan to complete
|
||||
const deadline = Date.now() + 30000;
|
||||
while (Date.now() < deadline) {
|
||||
const statusResp = await requestContext.get(`${API_URL}/system/scan/status`);
|
||||
const status = await statusResp.json();
|
||||
if (!status.is_running) {
|
||||
break;
|
||||
}
|
||||
await new Promise(r => setTimeout(r, 500));
|
||||
}
|
||||
|
||||
await requestContext.dispose();
|
||||
});
|
||||
|
||||
|
||||
Reference in New Issue
Block a user