exclusion list apply logic

2026-05-04 20:16:47 -04:00
parent 699bc415fb
commit 078ea8931b
10 changed files with 428 additions and 10 deletions
@@ -92,10 +92,11 @@ All FastAPI route handlers must declare explicit `operation_id` to control the g
 Generated from the backend OpenAPI spec using `@hey-api/openapi-ts`:
 ```bash
-cd backend && uv run python -c "import json; from app.main import app; json.dump(app.openapi(), open('openapi.json', 'w'))"
+just generate-client
 cd ../frontend && npx @hey-api/openapi-ts -i ../backend/openapi.json -o src/lib/api
 ```
 This runs the full pipeline: exports the OpenAPI spec from the running FastAPI app and regenerates the TypeScript SDK in `frontend/src/lib/api/`. Use this **after any backend change** that adds, renames, or modifies endpoints or schemas.
 The generated SDK exports clean camelCase functions (e.g., `getDashboardStats`, `listJobs`, `triggerScan`).
 **Rule:** After renaming any backend handler or changing an `operation_id`, regenerate the SDK and update all frontend imports. The old verbose names will cause TypeScript errors.
@@ -151,10 +152,19 @@ On macOS, `localhost` resolves to `::1` (IPv6) by default, but uvicorn may bind
 7. Add backend tests in `backend/tests/test_api_system.py` (or a new test file if it's a new domain).
 8. Run `just lint` before finishing.
-### Regenerating the OpenAPI Spec
+### Regenerating the OpenAPI Spec / TypeScript SDK
 Use the convenience command:
 ```bash
 just generate-client
 ```
 Or run the steps manually:
 ```bash
 cd backend && uv run python -c "import json; from app.main import app; json.dump(app.openapi(), open('openapi.json', 'w'), indent=2)"
 cd ../frontend && npx @hey-api/openapi-ts -i ../backend/openapi.json -o src/lib/api
 ```
 ### Verifying No Auto-Generated operationIds
@@ -0,0 +1,56 @@
 """add is_ignored_by_policy to filesystem_state
 Revision ID: 806e933ac89b
 Revises: 349e61f9e856
 Create Date: 2026-05-04 19:34:38.280865
 """
 from typing import Sequence, Union
 from alembic import op
 import sqlalchemy as sa
 # revision identifiers, used by Alembic.
 revision: str = "806e933ac89b"
 down_revision: Union[str, Sequence[str], None] = "349e61f9e856"
 branch_labels: Union[str, Sequence[str], None] = None
 depends_on: Union[str, Sequence[str], None] = None
 def upgrade() -> None:
    """Upgrade schema."""
    # Check if column already exists (e.g. from manual migration)
    conn = op.get_bind()
    inspector = sa.inspect(conn)
    columns = [c["name"] for c in inspector.get_columns("filesystem_state")]
    if "is_ignored_by_policy" not in columns:
        op.add_column(
            "filesystem_state",
            sa.Column(
                "is_ignored_by_policy", sa.Boolean(), nullable=False, server_default="0"
            ),
        )
    # Create index for efficient querying
    indexes = [idx["name"] for idx in inspector.get_indexes("filesystem_state")]
    if "ix_filesystem_state_is_ignored_by_policy" not in indexes:
        op.create_index(
            "ix_filesystem_state_is_ignored_by_policy",
            "filesystem_state",
            ["is_ignored_by_policy"],
            unique=False,
        )
    # Backfill: set is_ignored_by_policy = is_ignored for existing records
    op.execute("UPDATE filesystem_state SET is_ignored_by_policy = is_ignored")
 def downgrade() -> None:
    """Downgrade schema."""
    op.drop_index(
        "ix_filesystem_state_is_ignored_by_policy", table_name="filesystem_state"
    )
    op.drop_column("filesystem_state", "is_ignored_by_policy")
@@ -5,6 +5,7 @@ from typing import Dict, List, Optional
 import pathspec
 from pydantic import BaseModel, ConfigDict
 from sqlalchemy import text
 from sqlalchemy.orm import Session
 from app.db import models
@@ -82,6 +83,41 @@ def get_ignored_status(
    return False
 def get_ignored_by_policy(
    absolute_path: str,
    exclusion_spec: Optional[pathspec.PathSpec],
 ) -> bool:
    """Determines if a path is excluded by global policy only (ignores manual tracking rules)."""
    if exclusion_spec and exclusion_spec.match_file(absolute_path):
        return True
    return False
 def recompute_exclusion_policy(db_session: Session) -> None:
    """Recomputes is_ignored_by_policy and effective is_ignored for all indexed files."""
    exclusion_spec = get_exclusion_spec(db_session)
    tracking_rules = db_session.query(models.TrackedSource).all()
    tracking_map = {rule.path: rule.action for rule in tracking_rules}
    # Update is_ignored_by_policy in batches
    all_files = db_session.query(
        models.FilesystemState.id, models.FilesystemState.file_path
    ).all()
    for file_id, file_path in all_files:
        is_ignored_by_policy = get_ignored_by_policy(file_path, exclusion_spec)
        is_ignored = get_ignored_status(file_path, tracking_map, exclusion_spec)
        db_session.execute(
            text(
                "UPDATE filesystem_state SET is_ignored_by_policy = :policy, is_ignored = :ignored WHERE id = :id"
            ),
            {"policy": is_ignored_by_policy, "ignored": is_ignored, "id": file_id},
        )
    db_session.commit()
 def _validate_path_within_roots(path: str, roots: List[str]) -> bool:
    """Validates that a path does not contain traversal sequences and is within configured roots."""
    if ".." in path:
@@ -76,7 +76,9 @@ def browse_system_path(
                        entry_path = entry.path
                        is_dir = entry.is_dir()
                        is_ignored = get_ignored_status(
-                            entry_path, tracking_map, exclusion_spec
+                            entry_path + "/" if is_dir else entry_path,
                            tracking_map,
                            exclusion_spec,
                        )
                        if is_dir:
                            live_results.append(
@@ -129,7 +131,7 @@ def browse_system_path(
            if child_path not in seen:
                seen.add(child_path)
                dir_ignored = get_ignored_status(
-                    child_path, tracking_map, exclusion_spec
+                    child_path + "/", tracking_map, exclusion_spec
                )
                results.append(
                    FileItemSchema(
@@ -8,7 +8,7 @@ from fastapi.responses import StreamingResponse
 from pydantic import BaseModel
 from sqlalchemy.orm import Session
-from app.api.common import FileItemSchema, SettingSchema
+from app.api.common import FileItemSchema, SettingSchema, recompute_exclusion_policy
 from app.db import models
 from app.db.database import get_db
@@ -57,6 +57,10 @@ def update_settings(setting_data: SettingSchema, db_session: Session = Depends(g
        scheduler_manager.reload()
    # Recompute exclusion policy when global exclusions change
    if setting_data.key == "global_exclusions":
        recompute_exclusion_policy(db_session)
    return {"message": "Setting committed."}
@@ -21,7 +21,10 @@ class FilesystemState(Base):
    )
    is_ignored: Mapped[bool] = mapped_column(
        Boolean, default=False
-    )  # True if matches exclusion
+    )  # Effective ignored state (manual OR policy, with manual override)
    is_ignored_by_policy: Mapped[bool] = mapped_column(
        Boolean, default=False
    )  # True if excluded by global policy (excludes manual tracking rules)
    is_deleted: Mapped[bool] = mapped_column(
        Boolean, default=False
    )  # True if confirmed missing from disk
@@ -676,6 +676,32 @@ export type TestExclusionsRequest = {
    limit?: number;
 };
 /**
 * TestExclusionsResponse
 */
 export type TestExclusionsResponse = {
    /**
     * Total Files
     */
    total_files: number;
    /**
     * Total Size
     */
    total_size: number;
    /**
     * Matched Count
     */
    matched_count: number;
    /**
     * Matched Size
     */
    matched_size: number;
    /**
     * Sample
     */
    sample: Array<FileItemSchema>;
 };
 /**
 * TestNotificationRequest
 */
@@ -1185,7 +1211,7 @@ export type GetSettingsResponses = {
     * Successful Response
     */
    200: {
-        [key: string]: unknown;
+        [key: string]: string;
    };
 };
@@ -1234,9 +1260,11 @@ export type TestExclusionsResponses = {
    /**
     * Successful Response
     */
-    200: unknown;
+    200: TestExclusionsResponse;
 };
 export type TestExclusionsResponse2 = TestExclusionsResponses[keyof TestExclusionsResponses];
 export type DownloadExclusionReportData = {
    body: TestExclusionsRequest;
    path?: never;
@@ -0,0 +1,259 @@
 import { test, expect } from '@playwright/test';
 import fs from 'fs';
 import path from 'path';
 import { API_URL, SOURCE_ROOT, setupRequestContext, triggerScanAndWait } from './helpers';
 test.describe('Exclusion Policy', () => {
  test.beforeEach(async () => {
    fs.mkdirSync(SOURCE_ROOT, { recursive: true });
    fs.mkdirSync(path.join(SOURCE_ROOT, 'docs'), { recursive: true });
    fs.mkdirSync(path.join(SOURCE_ROOT, 'temp'), { recursive: true });
    fs.writeFileSync(path.join(SOURCE_ROOT, 'readme.txt'), 'hello');
    fs.writeFileSync(path.join(SOURCE_ROOT, 'notes.txt'), 'world');
    fs.writeFileSync(path.join(SOURCE_ROOT, 'data.tmp'), 'temp1');
    fs.writeFileSync(path.join(SOURCE_ROOT, 'cache.tmp'), 'temp2');
    fs.writeFileSync(path.join(SOURCE_ROOT, 'docs', 'guide.txt'), 'guide');
    fs.writeFileSync(path.join(SOURCE_ROOT, 'docs', 'draft.tmp'), 'draft');
    fs.writeFileSync(path.join(SOURCE_ROOT, 'temp', 'scratch.tmp'), 'scratch');
  });
  test('global exclusions mark matching files as ignored', async ({ page }) => {
    const requestContext = await setupRequestContext();
    await requestContext.post(`${API_URL}/system/settings`, {
      data: { key: 'source_roots', value: JSON.stringify([SOURCE_ROOT]) }
    });
    await requestContext.post(`${API_URL}/system/settings`, {
      data: { key: 'global_exclusions', value: '*.tmp\n' }
    });
    await triggerScanAndWait(requestContext);
    const browseResp = await requestContext.get(
      `${API_URL}/system/browse?path=${SOURCE_ROOT}`
    );
    const browseData = await browseResp.json();
    const files = (browseData as any).files;
    const tmpFiles = (files as Array<any>).filter((f: any) => f.name.endsWith('.tmp'));
    const txtFiles = (files as Array<any>).filter((f: any) => f.name.endsWith('.txt'));
    expect(tmpFiles.length).toBeGreaterThan(0);
    tmpFiles.forEach((f: any) => {
      expect(f.ignored, `expected ${f.name} to be ignored`).toBe(true);
    });
    txtFiles.forEach((f: any) => {
      expect(f.ignored, `expected ${f.name} to NOT be ignored`).toBe(false);
    });
    await requestContext.dispose();
  });
  test('manual include overrides global exclusion', async ({ page }) => {
    const requestContext = await setupRequestContext();
    await requestContext.post(`${API_URL}/system/settings`, {
      data: { key: 'source_roots', value: JSON.stringify([SOURCE_ROOT]) }
    });
    await requestContext.post(`${API_URL}/system/settings`, {
      data: { key: 'global_exclusions', value: '*.tmp\n' }
    });
    // Include one specific .tmp file despite the global exclusion
    await requestContext.post(`${API_URL}/system/track/batch`, {
      data: {
        tracks: [path.join(SOURCE_ROOT, 'data.tmp')],
        untracks: []
      }
    });
    await triggerScanAndWait(requestContext);
    const browseResp = await requestContext.get(
      `${API_URL}/system/browse?path=${SOURCE_ROOT}`
    );
    const browseData = await browseResp.json();
    const files = (browseData as any).files;
    const dataTmp = (files as Array<any>).find((f: any) => f.name === 'data.tmp');
    const cacheTmp = (files as Array<any>).find((f: any) => f.name === 'cache.tmp');
    expect(dataTmp).toBeDefined();
    expect(dataTmp.ignored).toBe(false);
    expect(cacheTmp).toBeDefined();
    expect(cacheTmp.ignored).toBe(true);
    await requestContext.dispose();
  });
  test('updating global exclusions recomputes existing indexed files', async ({ page }) => {
    const requestContext = await setupRequestContext();
    await requestContext.post(`${API_URL}/system/settings`, {
      data: { key: 'source_roots', value: JSON.stringify([SOURCE_ROOT]) }
    });
    // No exclusions initially
    await requestContext.post(`${API_URL}/system/settings`, {
      data: { key: 'global_exclusions', value: '' }
    });
    await triggerScanAndWait(requestContext);
    // Verify nothing is ignored before exclusions are set
    const browseBefore = await requestContext.get(
      `${API_URL}/system/browse?path=${SOURCE_ROOT}`
    );
    const beforeData = await browseBefore.json();
    const beforeFiles = (beforeData as any).files as Array<any>;
    beforeFiles.forEach((f: any) => {
      expect(f.ignored, `expected ${f.name} to NOT be ignored before policy`).toBe(false);
    });
    // Now apply global exclusions — should recompute without requiring a new scan
    await requestContext.post(`${API_URL}/system/settings`, {
      data: { key: 'global_exclusions', value: '*.tmp\n' }
    });
    const browseAfter = await requestContext.get(
      `${API_URL}/system/browse?path=${SOURCE_ROOT}`
    );
    const afterData = await browseAfter.json();
    const afterFiles = (afterData as any).files as Array<any>;
    const tmpAfter = afterFiles.filter((f: any) => f.name.endsWith('.tmp'));
    const txtAfter = afterFiles.filter((f: any) => f.name.endsWith('.txt'));
    tmpAfter.forEach((f: any) => {
      expect(f.ignored, `expected ${f.name} to be ignored after policy update`).toBe(true);
    });
    txtAfter.forEach((f: any) => {
      expect(f.ignored, `expected ${f.name} to NOT be ignored after policy update`).toBe(false);
    });
    await requestContext.dispose();
  });
  test('exclusion preview returns correct counts and sample', async ({ page }) => {
    const requestContext = await setupRequestContext();
    await requestContext.post(`${API_URL}/system/settings`, {
      data: { key: 'source_roots', value: JSON.stringify([SOURCE_ROOT]) }
    });
    await triggerScanAndWait(requestContext);
    const previewResp = await requestContext.post(`${API_URL}/system/settings/test-exclusions`, {
      data: { patterns: '*.tmp', limit: 10 }
    });
    expect(previewResp.ok()).toBe(true);
    const preview = await previewResp.json();
    expect(preview.total_files).toBeGreaterThan(0);
    expect(preview.matched_count).toBeGreaterThan(0);
    expect(preview.matched_size).toBeGreaterThanOrEqual(0);
    expect(Array.isArray(preview.sample)).toBe(true);
    expect(preview.sample.length).toBeGreaterThan(0);
    expect(preview.sample.length).toBeLessThanOrEqual(10);
    preview.sample.forEach((s: any) => {
      expect(s.name.endsWith('.tmp')).toBe(true);
      expect(s.path).toBeDefined();
      expect(s.size).toBeDefined();
    });
    await requestContext.dispose();
  });
  test('exclusion preview with no patterns returns empty result', async ({ page }) => {
    const requestContext = await setupRequestContext();
    await requestContext.post(`${API_URL}/system/settings`, {
      data: { key: 'source_roots', value: JSON.stringify([SOURCE_ROOT]) }
    });
    await triggerScanAndWait(requestContext);
    const previewResp = await requestContext.post(`${API_URL}/system/settings/test-exclusions`, {
      data: { patterns: '', limit: 10 }
    });
    expect(previewResp.ok()).toBe(true);
    const preview = await previewResp.json();
    expect(preview.total_files).toBe(0);
    expect(preview.matched_count).toBe(0);
    expect(preview.matched_size).toBe(0);
    expect(preview.sample).toEqual([]);
    await requestContext.dispose();
  });
  test('exclusion CSV download contains matched files', async ({ page }) => {
    const requestContext = await setupRequestContext();
    await requestContext.post(`${API_URL}/system/settings`, {
      data: { key: 'source_roots', value: JSON.stringify([SOURCE_ROOT]) }
    });
    await triggerScanAndWait(requestContext);
    const downloadResp = await requestContext.post(
      `${API_URL}/system/settings/test-exclusions/download`,
      { data: { patterns: '*.tmp' } }
    );
    expect(downloadResp.ok()).toBe(true);
    const contentType = downloadResp.headers()['content-type'];
    expect(contentType).toContain('text/csv');
    const body = await downloadResp.text();
    expect(body).toContain('path,size,mtime,sha256_hash');
    expect(body).toContain('.tmp');
    const lines = body.trim().split('\n');
    expect(lines.length).toBeGreaterThan(1); // header + at least one row
    await requestContext.dispose();
  });
  test('directory-level global exclusion ignores nested files', async ({ page }) => {
    const requestContext = await setupRequestContext();
    await requestContext.post(`${API_URL}/system/settings`, {
      data: { key: 'source_roots', value: JSON.stringify([SOURCE_ROOT]) }
    });
    await requestContext.post(`${API_URL}/system/settings`, {
      data: { key: 'global_exclusions', value: 'temp/\n' }
    });
    await triggerScanAndWait(requestContext);
    const browseRoot = await requestContext.get(
      `${API_URL}/system/browse?path=${SOURCE_ROOT}`
    );
    const rootData = await browseRoot.json();
    const rootFiles = (rootData as any).files as Array<any>;
    const tempDir = rootFiles.find((f: any) => f.name === 'temp');
    expect(tempDir).toBeDefined();
    expect(tempDir.ignored).toBe(true);
    // Files inside temp should also be ignored
    const browseTemp = await requestContext.get(
      `${API_URL}/system/browse?path=${path.join(SOURCE_ROOT, 'temp')}`
    );
    const tempData = await browseTemp.json();
    const tempFiles = (tempData as any).files as Array<any>;
    tempFiles.forEach((f: any) => {
      expect(f.ignored, `expected ${f.name} inside temp/ to be ignored`).toBe(true);
    });
    // Files outside temp should NOT be ignored
    const readme = rootFiles.find((f: any) => f.name === 'readme.txt');
    expect(readme).toBeDefined();
    expect(readme.ignored).toBe(false);
    await requestContext.dispose();
  });
 });
@@ -64,12 +64,25 @@ db-migrate message:
 # --- Code Generation ---
 # Export the OpenAPI spec JSON without regenerating the TypeScript client
 export-openapi:
    @echo "Exporting OpenAPI spec..."
    @cd backend && uv run python scripts/generate_openapi.py /tmp/tapehoard_openapi.json
 # Generate the TypeScript API client from the FastAPI OpenAPI spec
 generate-client: db-upgrade
    @echo "Generating TypeScript API client..."
    @cd backend && uv run python scripts/generate_openapi.py /tmp/tapehoard_openapi.json
    @cd frontend && npx @hey-api/openapi-ts -i /tmp/tapehoard_openapi.json -o src/lib/api -c @hey-api/client-fetch
 # Full regeneration workflow after schema changes: migrate, upgrade, generate client, lint
 regenerate message: db-upgrade
    @echo "Running full regeneration workflow..."
    cd backend && uv run alembic revision --autogenerate -m "{{message}}"
    cd backend && uv run alembic upgrade head
    @just generate-client
    @just lint
 # --- Docker ---
 # Build the production Docker image
@@ -97,3 +110,10 @@ playwright:
 playwright-ui:
    @echo "Starting playweight UI..."
    cd frontend && npx playwright test --ui
 # Clean test artifacts and kill stale test servers
 clean-test:
    @echo "Cleaning test artifacts..."
    pkill -f "start_test_server" 2>/dev/null || true
    rm -f backend/e2e_test.db backend/e2e_test.db-*
    rm -rf frontend/test-results/