exclusion list apply logic

2026-05-04 20:16:47 -04:00
parent 699bc415fb
commit 078ea8931b
10 changed files with 428 additions and 10 deletions
@@ -92,10 +92,11 @@ All FastAPI route handlers must declare explicit `operation_id` to control the g
 Generated from the backend OpenAPI spec using `@hey-api/openapi-ts`:

 ```bash
-cd backend && uv run python -c "import json; from app.main import app; json.dump(app.openapi(), open('openapi.json', 'w'))"
-cd ../frontend && npx @hey-api/openapi-ts -i ../backend/openapi.json -o src/lib/api
+just generate-client
 ```

+This runs the full pipeline: exports the OpenAPI spec from the running FastAPI app and regenerates the TypeScript SDK in `frontend/src/lib/api/`. Use this **after any backend change** that adds, renames, or modifies endpoints or schemas.
+
 The generated SDK exports clean camelCase functions (e.g., `getDashboardStats`, `listJobs`, `triggerScan`).

 **Rule:** After renaming any backend handler or changing an `operation_id`, regenerate the SDK and update all frontend imports. The old verbose names will cause TypeScript errors.
@@ -151,10 +152,19 @@ On macOS, `localhost` resolves to `::1` (IPv6) by default, but uvicorn may bind
 7. Add backend tests in `backend/tests/test_api_system.py` (or a new test file if it's a new domain).
 8. Run `just lint` before finishing.

-### Regenerating the OpenAPI Spec
+### Regenerating the OpenAPI Spec / TypeScript SDK
+
+Use the convenience command:
+
+```bash
+just generate-client
+```
+
+Or run the steps manually:

 ```bash
 cd backend && uv run python -c "import json; from app.main import app; json.dump(app.openapi(), open('openapi.json', 'w'), indent=2)"
+cd ../frontend && npx @hey-api/openapi-ts -i ../backend/openapi.json -o src/lib/api
 ```

 ### Verifying No Auto-Generated operationIds
@@ -0,0 +1,56 @@
+"""add is_ignored_by_policy to filesystem_state
+
+Revision ID: 806e933ac89b
+Revises: 349e61f9e856
+Create Date: 2026-05-04 19:34:38.280865
+
+"""
+
+from typing import Sequence, Union
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision: str = "806e933ac89b"
+down_revision: Union[str, Sequence[str], None] = "349e61f9e856"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    """Upgrade schema."""
+    # Check if column already exists (e.g. from manual migration)
+    conn = op.get_bind()
+    inspector = sa.inspect(conn)
+    columns = [c["name"] for c in inspector.get_columns("filesystem_state")]
+
+    if "is_ignored_by_policy" not in columns:
+        op.add_column(
+            "filesystem_state",
+            sa.Column(
+                "is_ignored_by_policy", sa.Boolean(), nullable=False, server_default="0"
+            ),
+        )
+
+    # Create index for efficient querying
+    indexes = [idx["name"] for idx in inspector.get_indexes("filesystem_state")]
+    if "ix_filesystem_state_is_ignored_by_policy" not in indexes:
+        op.create_index(
+            "ix_filesystem_state_is_ignored_by_policy",
+            "filesystem_state",
+            ["is_ignored_by_policy"],
+            unique=False,
+        )
+
+    # Backfill: set is_ignored_by_policy = is_ignored for existing records
+    op.execute("UPDATE filesystem_state SET is_ignored_by_policy = is_ignored")
+
+
+def downgrade() -> None:
+    """Downgrade schema."""
+    op.drop_index(
+        "ix_filesystem_state_is_ignored_by_policy", table_name="filesystem_state"
+    )
+    op.drop_column("filesystem_state", "is_ignored_by_policy")
@@ -5,6 +5,7 @@ from typing import Dict, List, Optional

 import pathspec
 from pydantic import BaseModel, ConfigDict
+from sqlalchemy import text
 from sqlalchemy.orm import Session

 from app.db import models
@@ -82,6 +83,41 @@ def get_ignored_status(
    return False


+def get_ignored_by_policy(
+    absolute_path: str,
+    exclusion_spec: Optional[pathspec.PathSpec],
+) -> bool:
+    """Determines if a path is excluded by global policy only (ignores manual tracking rules)."""
+    if exclusion_spec and exclusion_spec.match_file(absolute_path):
+        return True
+    return False
+
+
+def recompute_exclusion_policy(db_session: Session) -> None:
+    """Recomputes is_ignored_by_policy and effective is_ignored for all indexed files."""
+    exclusion_spec = get_exclusion_spec(db_session)
+    tracking_rules = db_session.query(models.TrackedSource).all()
+    tracking_map = {rule.path: rule.action for rule in tracking_rules}
+
+    # Update is_ignored_by_policy in batches
+    all_files = db_session.query(
+        models.FilesystemState.id, models.FilesystemState.file_path
+    ).all()
+
+    for file_id, file_path in all_files:
+        is_ignored_by_policy = get_ignored_by_policy(file_path, exclusion_spec)
+        is_ignored = get_ignored_status(file_path, tracking_map, exclusion_spec)
+
+        db_session.execute(
+            text(
+                "UPDATE filesystem_state SET is_ignored_by_policy = :policy, is_ignored = :ignored WHERE id = :id"
+            ),
+            {"policy": is_ignored_by_policy, "ignored": is_ignored, "id": file_id},
+        )
+
+    db_session.commit()
+
+
 def _validate_path_within_roots(path: str, roots: List[str]) -> bool:
    """Validates that a path does not contain traversal sequences and is within configured roots."""
    if ".." in path:
@@ -76,7 +76,9 @@ def browse_system_path(
                        entry_path = entry.path
                        is_dir = entry.is_dir()
                        is_ignored = get_ignored_status(
-                            entry_path, tracking_map, exclusion_spec
+                            entry_path + "/" if is_dir else entry_path,
+                            tracking_map,
+                            exclusion_spec,
                        )
                        if is_dir:
                            live_results.append(
@@ -129,7 +131,7 @@ def browse_system_path(
            if child_path not in seen:
                seen.add(child_path)
                dir_ignored = get_ignored_status(
-                    child_path, tracking_map, exclusion_spec
+                    child_path + "/", tracking_map, exclusion_spec
                )
                results.append(
                    FileItemSchema(
@@ -8,7 +8,7 @@ from fastapi.responses import StreamingResponse
 from pydantic import BaseModel
 from sqlalchemy.orm import Session

-from app.api.common import FileItemSchema, SettingSchema
+from app.api.common import FileItemSchema, SettingSchema, recompute_exclusion_policy
 from app.db import models
 from app.db.database import get_db

@@ -57,6 +57,10 @@ def update_settings(setting_data: SettingSchema, db_session: Session = Depends(g

        scheduler_manager.reload()

+    # Recompute exclusion policy when global exclusions change
+    if setting_data.key == "global_exclusions":
+        recompute_exclusion_policy(db_session)
+
    return {"message": "Setting committed."}


@@ -21,7 +21,10 @@ class FilesystemState(Base):
    )
    is_ignored: Mapped[bool] = mapped_column(
        Boolean, default=False
-    )  # True if matches exclusion
+    )  # Effective ignored state (manual OR policy, with manual override)
+    is_ignored_by_policy: Mapped[bool] = mapped_column(
+        Boolean, default=False
+    )  # True if excluded by global policy (excludes manual tracking rules)
    is_deleted: Mapped[bool] = mapped_column(
        Boolean, default=False
    )  # True if confirmed missing from disk
@@ -676,6 +676,32 @@ export type TestExclusionsRequest = {
    limit?: number;
 };

+/**
+ * TestExclusionsResponse
+ */
+export type TestExclusionsResponse = {
+    /**
+     * Total Files
+     */
+    total_files: number;
+    /**
+     * Total Size
+     */
+    total_size: number;
+    /**
+     * Matched Count
+     */
+    matched_count: number;
+    /**
+     * Matched Size
+     */
+    matched_size: number;
+    /**
+     * Sample
+     */
+    sample: Array<FileItemSchema>;
+};
+
 /**
 * TestNotificationRequest
 */
@@ -1185,7 +1211,7 @@ export type GetSettingsResponses = {
     * Successful Response
     */
    200: {
-        [key: string]: unknown;
+        [key: string]: string;
    };
 };

@@ -1234,9 +1260,11 @@ export type TestExclusionsResponses = {
    /**
     * Successful Response
     */
-    200: unknown;
+    200: TestExclusionsResponse;
 };

+export type TestExclusionsResponse2 = TestExclusionsResponses[keyof TestExclusionsResponses];
+
 export type DownloadExclusionReportData = {
    body: TestExclusionsRequest;
    path?: never;
@@ -0,0 +1,259 @@
+import { test, expect } from '@playwright/test';
+import fs from 'fs';
+import path from 'path';
+import { API_URL, SOURCE_ROOT, setupRequestContext, triggerScanAndWait } from './helpers';
+
+test.describe('Exclusion Policy', () => {
+  test.beforeEach(async () => {
+    fs.mkdirSync(SOURCE_ROOT, { recursive: true });
+    fs.mkdirSync(path.join(SOURCE_ROOT, 'docs'), { recursive: true });
+    fs.mkdirSync(path.join(SOURCE_ROOT, 'temp'), { recursive: true });
+    fs.writeFileSync(path.join(SOURCE_ROOT, 'readme.txt'), 'hello');
+    fs.writeFileSync(path.join(SOURCE_ROOT, 'notes.txt'), 'world');
+    fs.writeFileSync(path.join(SOURCE_ROOT, 'data.tmp'), 'temp1');
+    fs.writeFileSync(path.join(SOURCE_ROOT, 'cache.tmp'), 'temp2');
+    fs.writeFileSync(path.join(SOURCE_ROOT, 'docs', 'guide.txt'), 'guide');
+    fs.writeFileSync(path.join(SOURCE_ROOT, 'docs', 'draft.tmp'), 'draft');
+    fs.writeFileSync(path.join(SOURCE_ROOT, 'temp', 'scratch.tmp'), 'scratch');
+  });
+
+  test('global exclusions mark matching files as ignored', async ({ page }) => {
+    const requestContext = await setupRequestContext();
+
+    await requestContext.post(`${API_URL}/system/settings`, {
+      data: { key: 'source_roots', value: JSON.stringify([SOURCE_ROOT]) }
+    });
+    await requestContext.post(`${API_URL}/system/settings`, {
+      data: { key: 'global_exclusions', value: '*.tmp\n' }
+    });
+
+    await triggerScanAndWait(requestContext);
+
+    const browseResp = await requestContext.get(
+      `${API_URL}/system/browse?path=${SOURCE_ROOT}`
+    );
+    const browseData = await browseResp.json();
+    const files = (browseData as any).files;
+
+    const tmpFiles = (files as Array<any>).filter((f: any) => f.name.endsWith('.tmp'));
+    const txtFiles = (files as Array<any>).filter((f: any) => f.name.endsWith('.txt'));
+
+    expect(tmpFiles.length).toBeGreaterThan(0);
+    tmpFiles.forEach((f: any) => {
+      expect(f.ignored, `expected ${f.name} to be ignored`).toBe(true);
+    });
+
+    txtFiles.forEach((f: any) => {
+      expect(f.ignored, `expected ${f.name} to NOT be ignored`).toBe(false);
+    });
+
+    await requestContext.dispose();
+  });
+
+  test('manual include overrides global exclusion', async ({ page }) => {
+    const requestContext = await setupRequestContext();
+
+    await requestContext.post(`${API_URL}/system/settings`, {
+      data: { key: 'source_roots', value: JSON.stringify([SOURCE_ROOT]) }
+    });
+    await requestContext.post(`${API_URL}/system/settings`, {
+      data: { key: 'global_exclusions', value: '*.tmp\n' }
+    });
+
+    // Include one specific .tmp file despite the global exclusion
+    await requestContext.post(`${API_URL}/system/track/batch`, {
+      data: {
+        tracks: [path.join(SOURCE_ROOT, 'data.tmp')],
+        untracks: []
+      }
+    });
+
+    await triggerScanAndWait(requestContext);
+
+    const browseResp = await requestContext.get(
+      `${API_URL}/system/browse?path=${SOURCE_ROOT}`
+    );
+    const browseData = await browseResp.json();
+    const files = (browseData as any).files;
+
+    const dataTmp = (files as Array<any>).find((f: any) => f.name === 'data.tmp');
+    const cacheTmp = (files as Array<any>).find((f: any) => f.name === 'cache.tmp');
+
+    expect(dataTmp).toBeDefined();
+    expect(dataTmp.ignored).toBe(false);
+
+    expect(cacheTmp).toBeDefined();
+    expect(cacheTmp.ignored).toBe(true);
+
+    await requestContext.dispose();
+  });
+
+  test('updating global exclusions recomputes existing indexed files', async ({ page }) => {
+    const requestContext = await setupRequestContext();
+
+    await requestContext.post(`${API_URL}/system/settings`, {
+      data: { key: 'source_roots', value: JSON.stringify([SOURCE_ROOT]) }
+    });
+    // No exclusions initially
+    await requestContext.post(`${API_URL}/system/settings`, {
+      data: { key: 'global_exclusions', value: '' }
+    });
+
+    await triggerScanAndWait(requestContext);
+
+    // Verify nothing is ignored before exclusions are set
+    const browseBefore = await requestContext.get(
+      `${API_URL}/system/browse?path=${SOURCE_ROOT}`
+    );
+    const beforeData = await browseBefore.json();
+    const beforeFiles = (beforeData as any).files as Array<any>;
+    beforeFiles.forEach((f: any) => {
+      expect(f.ignored, `expected ${f.name} to NOT be ignored before policy`).toBe(false);
+    });
+
+    // Now apply global exclusions — should recompute without requiring a new scan
+    await requestContext.post(`${API_URL}/system/settings`, {
+      data: { key: 'global_exclusions', value: '*.tmp\n' }
+    });
+
+    const browseAfter = await requestContext.get(
+      `${API_URL}/system/browse?path=${SOURCE_ROOT}`
+    );
+    const afterData = await browseAfter.json();
+    const afterFiles = (afterData as any).files as Array<any>;
+
+    const tmpAfter = afterFiles.filter((f: any) => f.name.endsWith('.tmp'));
+    const txtAfter = afterFiles.filter((f: any) => f.name.endsWith('.txt'));
+
+    tmpAfter.forEach((f: any) => {
+      expect(f.ignored, `expected ${f.name} to be ignored after policy update`).toBe(true);
+    });
+    txtAfter.forEach((f: any) => {
+      expect(f.ignored, `expected ${f.name} to NOT be ignored after policy update`).toBe(false);
+    });
+
+    await requestContext.dispose();
+  });
+
+  test('exclusion preview returns correct counts and sample', async ({ page }) => {
+    const requestContext = await setupRequestContext();
+
+    await requestContext.post(`${API_URL}/system/settings`, {
+      data: { key: 'source_roots', value: JSON.stringify([SOURCE_ROOT]) }
+    });
+
+    await triggerScanAndWait(requestContext);
+
+    const previewResp = await requestContext.post(`${API_URL}/system/settings/test-exclusions`, {
+      data: { patterns: '*.tmp', limit: 10 }
+    });
+    expect(previewResp.ok()).toBe(true);
+    const preview = await previewResp.json();
+
+    expect(preview.total_files).toBeGreaterThan(0);
+    expect(preview.matched_count).toBeGreaterThan(0);
+    expect(preview.matched_size).toBeGreaterThanOrEqual(0);
+    expect(Array.isArray(preview.sample)).toBe(true);
+    expect(preview.sample.length).toBeGreaterThan(0);
+    expect(preview.sample.length).toBeLessThanOrEqual(10);
+
+    preview.sample.forEach((s: any) => {
+      expect(s.name.endsWith('.tmp')).toBe(true);
+      expect(s.path).toBeDefined();
+      expect(s.size).toBeDefined();
+    });
+
+    await requestContext.dispose();
+  });
+
+  test('exclusion preview with no patterns returns empty result', async ({ page }) => {
+    const requestContext = await setupRequestContext();
+
+    await requestContext.post(`${API_URL}/system/settings`, {
+      data: { key: 'source_roots', value: JSON.stringify([SOURCE_ROOT]) }
+    });
+
+    await triggerScanAndWait(requestContext);
+
+    const previewResp = await requestContext.post(`${API_URL}/system/settings/test-exclusions`, {
+      data: { patterns: '', limit: 10 }
+    });
+    expect(previewResp.ok()).toBe(true);
+    const preview = await previewResp.json();
+
+    expect(preview.total_files).toBe(0);
+    expect(preview.matched_count).toBe(0);
+    expect(preview.matched_size).toBe(0);
+    expect(preview.sample).toEqual([]);
+
+    await requestContext.dispose();
+  });
+
+  test('exclusion CSV download contains matched files', async ({ page }) => {
+    const requestContext = await setupRequestContext();
+
+    await requestContext.post(`${API_URL}/system/settings`, {
+      data: { key: 'source_roots', value: JSON.stringify([SOURCE_ROOT]) }
+    });
+
+    await triggerScanAndWait(requestContext);
+
+    const downloadResp = await requestContext.post(
+      `${API_URL}/system/settings/test-exclusions/download`,
+      { data: { patterns: '*.tmp' } }
+    );
+    expect(downloadResp.ok()).toBe(true);
+
+    const contentType = downloadResp.headers()['content-type'];
+    expect(contentType).toContain('text/csv');
+
+    const body = await downloadResp.text();
+    expect(body).toContain('path,size,mtime,sha256_hash');
+    expect(body).toContain('.tmp');
+
+    const lines = body.trim().split('\n');
+    expect(lines.length).toBeGreaterThan(1); // header + at least one row
+
+    await requestContext.dispose();
+  });
+
+  test('directory-level global exclusion ignores nested files', async ({ page }) => {
+    const requestContext = await setupRequestContext();
+
+    await requestContext.post(`${API_URL}/system/settings`, {
+      data: { key: 'source_roots', value: JSON.stringify([SOURCE_ROOT]) }
+    });
+    await requestContext.post(`${API_URL}/system/settings`, {
+      data: { key: 'global_exclusions', value: 'temp/\n' }
+    });
+
+    await triggerScanAndWait(requestContext);
+
+    const browseRoot = await requestContext.get(
+      `${API_URL}/system/browse?path=${SOURCE_ROOT}`
+    );
+    const rootData = await browseRoot.json();
+    const rootFiles = (rootData as any).files as Array<any>;
+
+    const tempDir = rootFiles.find((f: any) => f.name === 'temp');
+    expect(tempDir).toBeDefined();
+    expect(tempDir.ignored).toBe(true);
+
+    // Files inside temp should also be ignored
+    const browseTemp = await requestContext.get(
+      `${API_URL}/system/browse?path=${path.join(SOURCE_ROOT, 'temp')}`
+    );
+    const tempData = await browseTemp.json();
+    const tempFiles = (tempData as any).files as Array<any>;
+    tempFiles.forEach((f: any) => {
+      expect(f.ignored, `expected ${f.name} inside temp/ to be ignored`).toBe(true);
+    });
+
+    // Files outside temp should NOT be ignored
+    const readme = rootFiles.find((f: any) => f.name === 'readme.txt');
+    expect(readme).toBeDefined();
+    expect(readme.ignored).toBe(false);
+
+    await requestContext.dispose();
+  });
+});
@@ -64,12 +64,25 @@ db-migrate message:

 # --- Code Generation ---

+# Export the OpenAPI spec JSON without regenerating the TypeScript client
+export-openapi:
+    @echo "Exporting OpenAPI spec..."
+    @cd backend && uv run python scripts/generate_openapi.py /tmp/tapehoard_openapi.json
+
 # Generate the TypeScript API client from the FastAPI OpenAPI spec
 generate-client: db-upgrade
    @echo "Generating TypeScript API client..."
    @cd backend && uv run python scripts/generate_openapi.py /tmp/tapehoard_openapi.json
    @cd frontend && npx @hey-api/openapi-ts -i /tmp/tapehoard_openapi.json -o src/lib/api -c @hey-api/client-fetch

+# Full regeneration workflow after schema changes: migrate, upgrade, generate client, lint
+regenerate message: db-upgrade
+    @echo "Running full regeneration workflow..."
+    cd backend && uv run alembic revision --autogenerate -m "{{message}}"
+    cd backend && uv run alembic upgrade head
+    @just generate-client
+    @just lint
+
 # --- Docker ---

 # Build the production Docker image
@@ -97,3 +110,10 @@ playwright:
 playwright-ui:
    @echo "Starting playweight UI..."
    cd frontend && npx playwright test --ui
+
+# Clean test artifacts and kill stale test servers
+clean-test:
+    @echo "Cleaning test artifacts..."
+    pkill -f "start_test_server" 2>/dev/null || true
+    rm -f backend/e2e_test.db backend/e2e_test.db-*
+    rm -rf frontend/test-results/