From a06821968407dc18c0117af049a1c492525109fd Mon Sep 17 00:00:00 2001 From: Vinta Chen Date: Sun, 3 May 2026 12:08:15 +0800 Subject: [PATCH] fix(website): type build template entries --- website/build.py | 118 +++++++++++++++++++++--------------- website/tests/test_build.py | 95 ++++++++++++----------------- 2 files changed, 110 insertions(+), 103 deletions(-) diff --git a/website/build.py b/website/build.py index 4080c516..1e5585c9 100644 --- a/website/build.py +++ b/website/build.py @@ -9,10 +9,10 @@ from collections import Counter from collections.abc import Sequence from datetime import UTC, datetime from pathlib import Path -from typing import Any +from typing import TypedDict from jinja2 import Environment, FileSystemLoader -from readme_parser import ParsedGroup, ParsedSection, parse_readme, parse_sponsors, slugify +from readme_parser import AlsoSee, ParsedGroup, ParsedSection, parse_readme, parse_sponsors, slugify GITHUB_REPO_URL_RE = re.compile(r"^https?://github\.com/([^/]+/[^/]+?)(?:\.git)?/?$") MARKDOWN_LINK_RE = re.compile(r"\[[^\]]+\]\(([^)\s]+)\)") @@ -36,6 +36,37 @@ SOURCE_TYPE_DOMAINS = { } +class TemplateSubcategory(TypedDict): + name: str + value: str + slug: str + url: str + + +class TemplateEntry(TypedDict): + name: str + url: str + description: str + categories: list[str] + groups: list[str] + subcategories: list[TemplateSubcategory] + stars: int | None + owner: str | None + last_commit_at: str | None + source_type: str | None + also_see: list[AlsoSee] + + +class SyntheticCategory(TypedDict): + name: str + slug: str + description: str + description_html: str + + +TemplateCategory = ParsedSection | SyntheticCategory + + def detect_source_type(url: str) -> str | None: """Detect source type from URL domain. Returns None for GitHub URLs.""" if GITHUB_REPO_URL_RE.match(url): @@ -64,13 +95,13 @@ def load_stars(path: Path) -> dict[str, dict]: return {} -def sort_entries(entries: list[dict]) -> list[dict]: +def sort_entries(entries: Sequence[TemplateEntry]) -> list[TemplateEntry]: """Sort entries by stars descending, then name ascending. Three tiers: starred entries first, stdlib second, other non-starred last. """ - def sort_key(entry: dict) -> tuple[int, int, int, str]: + def sort_key(entry: TemplateEntry) -> tuple[int, int, int, str]: stars = entry["stars"] name = entry["name"].lower() if stars is not None: @@ -84,13 +115,7 @@ def sort_entries(entries: list[dict]) -> list[dict]: def build_robots_txt() -> str: - return ( - "User-agent: *\n" - "Content-Signal: search=yes, ai-input=yes, ai-train=yes\n" - "Allow: /\n" - "\n" - f"Sitemap: {SITEMAP_URL}\n" - ) + return f"User-agent: *\nContent-Signal: search=yes, ai-input=yes, ai-train=yes\nAllow: /\n\nSitemap: {SITEMAP_URL}\n" def category_path(category: ParsedSection) -> str: @@ -117,7 +142,7 @@ def subcategory_public_url(category_slug: str, subcategory_slug: str) -> str: return f"{SITE_URL}categories/{category_slug}/{subcategory_slug}/" -def synthetic_category(name: str, slug: str) -> dict[str, str]: +def synthetic_category(name: str, slug: str) -> SyntheticCategory: return {"name": name, "slug": slug, "description": "", "description_html": ""} @@ -202,7 +227,7 @@ def annotate_entries_with_stars( if not entry or "stars" not in entry: continue stripped = line.rstrip("\n") - ending = line[len(stripped):] + ending = line[len(stripped) :] annotated = f"{stripped} ({format_stars(entry['stars'])}){ending}" break out.append(annotated) @@ -233,7 +258,7 @@ def remove_sponsors_section(markdown: str) -> str: def extract_entries( categories: list[ParsedSection], groups: list[ParsedGroup], -) -> list[dict]: +) -> list[TemplateEntry]: """Flatten categories into individual library entries for table display. Entries appearing in multiple categories are merged into a single entry @@ -241,27 +266,27 @@ def extract_entries( """ cat_to_group = {cat["name"]: group["name"] for group in groups for cat in group["categories"]} - seen: dict[tuple[str, str], dict[str, Any]] = {} # (url, name) -> entry - entries: list[dict[str, Any]] = [] + seen: dict[tuple[str, str], TemplateEntry] = {} # (url, name) -> entry + entries: list[TemplateEntry] = [] for cat in categories: group_name = cat_to_group.get(cat["name"], "Other") for entry in cat["entries"]: key = (entry["url"], entry["name"]) - existing: dict[str, Any] | None = seen.get(key) + existing = seen.get(key) if existing is None: - existing = { - "name": entry["name"], - "url": entry["url"], - "description": entry["description"], - "categories": [], - "groups": [], - "subcategories": [], - "stars": None, - "owner": None, - "last_commit_at": None, - "source_type": detect_source_type(entry["url"]), - "also_see": entry["also_see"], - } + existing = TemplateEntry( + name=entry["name"], + url=entry["url"], + description=entry["description"], + categories=[], + groups=[], + subcategories=[], + stars=None, + owner=None, + last_commit_at=None, + source_type=detect_source_type(entry["url"]), + also_see=entry["also_see"], + ) seen[key] = existing entries.append(existing) if cat["name"] not in existing["categories"]: @@ -273,12 +298,14 @@ def extract_entries( scoped = f"{cat['name']} > {subcat}" if not any(s["value"] == scoped for s in existing["subcategories"]): sub_slug = slugify(subcat) - existing["subcategories"].append({ - "name": subcat, - "value": scoped, - "slug": sub_slug, - "url": f"/categories/{cat['slug']}/{sub_slug}/", - }) + existing["subcategories"].append( + TemplateSubcategory( + name=subcat, + value=scoped, + slug=sub_slug, + url=f"/categories/{cat['slug']}/{sub_slug}/", + ) + ) return entries @@ -303,10 +330,7 @@ def build(repo_root: Path) -> None: all_top_level_slugs = cat_slugs + group_slugs + [BUILTIN_SLUG] duplicates = {s for s, n in Counter(all_top_level_slugs).items() if n > 1} if duplicates: - raise ValueError( - f"slug collision in /categories/ namespace: {sorted(duplicates)}. " - "Rename a category or group so their slugs differ." - ) + raise ValueError(f"slug collision in /categories/ namespace: {sorted(duplicates)}. Rename a category or group so their slugs differ.") total_entries = sum(c["entry_count"] for c in categories) entries = extract_entries(categories, parsed_groups) build_date = datetime.now(UTC) @@ -377,14 +401,14 @@ def build(repo_root: Path) -> None: categories_dir = site_dir / "categories" def render_category( - category: dict, + category: TemplateCategory, *, category_url: str, - entries: list[dict], + entries: Sequence[TemplateEntry], current_path: str, page_dir: Path, - parent_category: dict | None = None, - group_categories: list | None = None, + parent_category: ParsedSection | None = None, + group_categories: Sequence[ParsedSection] | None = None, ) -> None: page_dir.mkdir(parents=True, exist_ok=True) (page_dir / "index.html").write_text( @@ -443,7 +467,7 @@ def build(repo_root: Path) -> None: encoding="utf-8", ) - subcat_to_entries: dict[str, list[dict]] = {} + subcat_to_entries: dict[str, list[TemplateEntry]] = {} subcat_meta: dict[str, tuple[str, str, str]] = {} # value -> (cat_slug, sub_slug, sub_name) cat_slug_by_url_prefix = {f"/categories/{c['slug']}/": c["slug"] for c in categories} cat_by_slug = {c["slug"]: c for c in categories} @@ -472,9 +496,7 @@ def build(repo_root: Path) -> None: if static_src.exists(): shutil.copytree(static_src, static_dst, dirs_exist_ok=True) - markdown_index = annotate_entries_with_stars( - remove_sponsors_section(readme_text), stars_data - ) + markdown_index = annotate_entries_with_stars(remove_sponsors_section(readme_text), stars_data) llms_template = (website / "templates" / "llms.txt").read_text(encoding="utf-8") llms_txt = build_llms_txt(llms_template, readme_text, stars_data) (site_dir / "robots.txt").write_text(build_robots_txt(), encoding="utf-8") diff --git a/website/tests/test_build.py b/website/tests/test_build.py index 2072e350..887c0248 100644 --- a/website/tests/test_build.py +++ b/website/tests/test_build.py @@ -9,8 +9,8 @@ from html.parser import HTMLParser from pathlib import Path import pytest - from build import ( + TemplateEntry, annotate_entries_with_stars, build, detect_source_type, @@ -121,28 +121,15 @@ class TestBuild: encoding="utf-8", ) (tpl_dir / "category.html").write_text( - '{% extends "base.html" %}{% block content %}' - "

{{ category.name }}

" - "{% for entry in entries %}" - '{{ entry.name }}' - "{% endfor %}" - "{% endblock %}", + '{% extends "base.html" %}{% block content %}

{{ category.name }}

{% for entry in entries %}{{ entry.name }}{% endfor %}{% endblock %}', encoding="utf-8", ) (tpl_dir / "sponsorship.html").write_text( - '{% extends "base.html" %}{% block content %}' - "

Sponsor

" - "{% endblock %}", + '{% extends "base.html" %}{% block content %}

Sponsor

{% endblock %}', encoding="utf-8", ) (tpl_dir / "llms.txt").write_text( - "# Awesome Python\n" - "\n" - "Use this list to find Python tools.\n" - "\n" - "# Categories\n" - "\n" - "{{ categories_md }}\n", + "# Awesome Python\n\nUse this list to find Python tools.\n\n# Categories\n\n{{ categories_md }}\n", encoding="utf-8", ) @@ -220,19 +207,13 @@ class TestBuild: site = tmp_path / "website" / "output" robots = (site / "robots.txt").read_text(encoding="utf-8") - assert robots == ( - "User-agent: *\n" - "Content-Signal: search=yes, ai-input=yes, ai-train=yes\n" - "Allow: /\n" - "\n" - "Sitemap: https://awesome-python.com/sitemap.xml\n" - ) + assert robots == ("User-agent: *\nContent-Signal: search=yes, ai-input=yes, ai-train=yes\nAllow: /\n\nSitemap: https://awesome-python.com/sitemap.xml\n") sitemap = ET.parse(site / "sitemap.xml") root = sitemap.getroot() ns = {"sitemap": "http://www.sitemaps.org/schemas/sitemap/0.9"} - locs = [loc.text for loc in root.findall("sitemap:url/sitemap:loc", ns)] - lastmods = [lastmod.text for lastmod in root.findall("sitemap:url/sitemap:lastmod", ns)] + locs = [loc.text or "" for loc in root.findall("sitemap:url/sitemap:loc", ns)] + lastmods = [lastmod.text or "" for lastmod in root.findall("sitemap:url/sitemap:lastmod", ns)] assert root.tag == "{http://www.sitemaps.org/schemas/sitemap/0.9}urlset" assert locs == [ @@ -820,45 +801,61 @@ class TestLoadStars: # --------------------------------------------------------------------------- +def _template_entry(name: str, stars: int | None, source_type: str | None = None) -> TemplateEntry: + return TemplateEntry( + name=name, + url="", + description="", + categories=[], + groups=[], + subcategories=[], + stars=stars, + owner=None, + last_commit_at=None, + source_type=source_type, + also_see=[], + ) + + class TestSortEntries: def test_sorts_by_stars_descending(self): entries = [ - {"name": "a", "stars": 100, "url": ""}, - {"name": "b", "stars": 500, "url": ""}, - {"name": "c", "stars": 200, "url": ""}, + _template_entry("a", 100), + _template_entry("b", 500), + _template_entry("c", 200), ] result = sort_entries(entries) assert [e["name"] for e in result] == ["b", "c", "a"] def test_equal_stars_sorted_alphabetically(self): entries = [ - {"name": "beta", "stars": 100, "url": ""}, - {"name": "alpha", "stars": 100, "url": ""}, + _template_entry("beta", 100), + _template_entry("alpha", 100), ] result = sort_entries(entries) assert [e["name"] for e in result] == ["alpha", "beta"] def test_no_stars_go_to_bottom(self): entries = [ - {"name": "no-stars", "stars": None, "url": ""}, - {"name": "has-stars", "stars": 50, "url": ""}, + _template_entry("no-stars", None), + _template_entry("has-stars", 50), ] result = sort_entries(entries) assert [e["name"] for e in result] == ["has-stars", "no-stars"] def test_no_stars_sorted_alphabetically(self): entries = [ - {"name": "zebra", "stars": None, "url": ""}, - {"name": "apple", "stars": None, "url": ""}, + _template_entry("zebra", None), + _template_entry("apple", None), ] result = sort_entries(entries) assert [e["name"] for e in result] == ["apple", "zebra"] def test_builtin_between_starred_and_unstarred(self): entries = [ - {"name": "builtin", "stars": None, "source_type": "Built-in"}, - {"name": "starred", "stars": 100, "source_type": None}, - {"name": "unstarred", "stars": None, "source_type": None}, + _template_entry("builtin", None, "Built-in"), + _template_entry("starred", 100), + _template_entry("unstarred", None), ] result = sort_entries(entries) assert [e["name"] for e in result] == ["starred", "builtin", "unstarred"] @@ -1005,23 +1002,15 @@ class TestAnnotateEntriesWithStars: def test_appends_star_count_to_bullet(self): markdown = "- [foo](https://github.com/owner/foo) - A foo.\n" stars = {"owner/foo": {"stars": 123, "owner": "owner"}} - assert annotate_entries_with_stars(markdown, stars) == ( - "- [foo](https://github.com/owner/foo) - A foo. (123 GitHub stars)\n" - ) + assert annotate_entries_with_stars(markdown, stars) == ("- [foo](https://github.com/owner/foo) - A foo. (123 GitHub stars)\n") def test_uses_first_github_link(self): - markdown = ( - "- [foo](https://github.com/owner/foo) - A foo. " - "Also [bar](https://github.com/owner/bar).\n" - ) + markdown = "- [foo](https://github.com/owner/foo) - A foo. Also [bar](https://github.com/owner/bar).\n" stars = { "owner/foo": {"stars": 10, "owner": "owner"}, "owner/bar": {"stars": 99, "owner": "owner"}, } - assert annotate_entries_with_stars(markdown, stars) == ( - "- [foo](https://github.com/owner/foo) - A foo. " - "Also [bar](https://github.com/owner/bar). (10 GitHub stars)\n" - ) + assert annotate_entries_with_stars(markdown, stars) == ("- [foo](https://github.com/owner/foo) - A foo. Also [bar](https://github.com/owner/bar). (10 GitHub stars)\n") def test_skips_entries_without_star_data(self): markdown = "- [foo](https://github.com/owner/foo) - A foo.\n" @@ -1040,13 +1029,9 @@ class TestAnnotateEntriesWithStars: def test_handles_indented_bullets(self): markdown = " - [foo](https://github.com/owner/foo)\n" stars = {"owner/foo": {"stars": 7, "owner": "owner"}} - assert annotate_entries_with_stars(markdown, stars) == ( - " - [foo](https://github.com/owner/foo) (7 GitHub stars)\n" - ) + assert annotate_entries_with_stars(markdown, stars) == (" - [foo](https://github.com/owner/foo) (7 GitHub stars)\n") def test_preserves_lines_without_trailing_newline(self): markdown = "- [foo](https://github.com/owner/foo) - A foo." stars = {"owner/foo": {"stars": 5, "owner": "owner"}} - assert annotate_entries_with_stars(markdown, stars) == ( - "- [foo](https://github.com/owner/foo) - A foo. (5 GitHub stars)" - ) + assert annotate_entries_with_stars(markdown, stars) == ("- [foo](https://github.com/owner/foo) - A foo. (5 GitHub stars)")