mirror of
https://github.com/astral-sh/ruff.git
synced 2026-05-06 08:56:57 -04:00
More minor improvements to conformance.py (#23792)
This commit is contained in:
+64
-32
@@ -76,13 +76,12 @@ CONFORMANCE_URL = CONFORMANCE_DIR_WITH_README + "tests/{filename}#L{line}"
|
||||
|
||||
GITHUB_HEADER = [
|
||||
"<table>",
|
||||
"",
|
||||
"<tr>",
|
||||
"<th>Test case</th>",
|
||||
"<th>Diff</th>",
|
||||
"</tr>",
|
||||
]
|
||||
GITHUB_FOOTER = ["", "</table>"]
|
||||
GITHUB_FOOTER = ["</table>"]
|
||||
SUMMARY_NOTE = """
|
||||
Each test case represents one expected error annotation or a group of annotations
|
||||
sharing a tag. Counts are per test case, not per diagnostic — multiple diagnostics
|
||||
@@ -370,7 +369,6 @@ def render_html_diff_row(tc: TestCase, *, source: Source | None) -> list[str]:
|
||||
return [
|
||||
"",
|
||||
"<tr>",
|
||||
"",
|
||||
"<td>",
|
||||
"",
|
||||
location,
|
||||
@@ -384,7 +382,6 @@ def render_html_diff_row(tc: TestCase, *, source: Source | None) -> list[str]:
|
||||
"```",
|
||||
"",
|
||||
"</td>",
|
||||
"",
|
||||
"</tr>",
|
||||
]
|
||||
|
||||
@@ -719,16 +716,28 @@ def render_test_cases(
|
||||
lines.append("```")
|
||||
else:
|
||||
lines.extend(GITHUB_FOOTER)
|
||||
lines.extend(["", "</details>", ""])
|
||||
lines.extend(["</details>", ""])
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def render_file_stats_table(test_cases: list[TestCase]) -> str:
|
||||
"""Render a per-file breakdown showing only files whose TP/FP/FN counts changed."""
|
||||
def collect_file_stats(test_cases: list[TestCase]) -> list[FileStats]:
|
||||
"""Compute per-file statistics from grouped test cases."""
|
||||
path_to_cases: dict[Path, list[TestCase]] = {}
|
||||
for tc in test_cases:
|
||||
path_to_cases.setdefault(tc.path, []).append(tc)
|
||||
return [
|
||||
FileStats(
|
||||
path=path,
|
||||
old=compute_stats(cases, Source.OLD),
|
||||
new=compute_stats(cases, Source.NEW),
|
||||
)
|
||||
for path, cases in path_to_cases.items()
|
||||
]
|
||||
|
||||
|
||||
def render_file_stats_table(file_stats: list[FileStats]) -> str:
|
||||
"""Render a per-file breakdown showing only files whose TP/FP/FN counts changed."""
|
||||
|
||||
def fmt(old: int, new: int, *, greater_is_better: bool = True) -> str:
|
||||
if old == new:
|
||||
@@ -738,19 +747,13 @@ def render_file_stats_table(test_cases: list[TestCase]) -> str:
|
||||
indicator = " ✅" if improved else " ❌"
|
||||
return f"{new} ({diff:+}){indicator}"
|
||||
|
||||
# Collect per-file data; track totals across all files regardless of change.
|
||||
file_stats: list[FileStats] = []
|
||||
# Collect totals across all files regardless of change.
|
||||
old_totals = Statistics()
|
||||
new_totals = Statistics()
|
||||
passing = 0
|
||||
total_files = 0
|
||||
total_files = len(file_stats)
|
||||
|
||||
for path, cases in path_to_cases.items():
|
||||
fs = FileStats(
|
||||
path=path,
|
||||
old=compute_stats(cases, Source.OLD),
|
||||
new=compute_stats(cases, Source.NEW),
|
||||
)
|
||||
for fs in file_stats:
|
||||
old_totals.true_positives += fs.old.true_positives
|
||||
old_totals.false_positives += fs.old.false_positives
|
||||
old_totals.false_negatives += fs.old.false_negatives
|
||||
@@ -758,8 +761,6 @@ def render_file_stats_table(test_cases: list[TestCase]) -> str:
|
||||
new_totals.false_positives += fs.new.false_positives
|
||||
new_totals.false_negatives += fs.new.false_negatives
|
||||
passing += fs.new_passes
|
||||
total_files += 1
|
||||
file_stats.append(fs)
|
||||
|
||||
changed = [fs for fs in file_stats if fs.total_change > 0]
|
||||
if not changed:
|
||||
@@ -772,9 +773,9 @@ def render_file_stats_table(test_cases: list[TestCase]) -> str:
|
||||
if fs.new_passes and not fs.old_passes:
|
||||
status = "✅ Newly Passing 🎉"
|
||||
elif fs.old_passes and not fs.new_passes:
|
||||
status = "❌ Newly Failing"
|
||||
status = "❌ Newly Failing ☹️"
|
||||
elif fs.new_passes:
|
||||
status = "✅"
|
||||
status = "✅ Still Passing"
|
||||
else:
|
||||
old_errors = fs.old.false_positives + fs.old.false_negatives
|
||||
new_errors = fs.new.false_positives + fs.new.false_negatives
|
||||
@@ -849,17 +850,34 @@ def diff_format(
|
||||
assert_never((greater_is_better, increased)) # ty: ignore[type-assertion-failure]
|
||||
|
||||
|
||||
def render_summary(test_cases: list[TestCase], *, force_summary_table: bool) -> str:
|
||||
def format_metric(diff: float, old: float, new: float):
|
||||
def render_summary(
|
||||
test_cases: list[TestCase],
|
||||
file_stats: list[FileStats],
|
||||
*,
|
||||
force_summary_table: bool,
|
||||
) -> str:
|
||||
def format_metric(diff: float, old: float, new: float) -> str:
|
||||
if diff > 0:
|
||||
return f"increased from {old:.2%} to {new:.2%}"
|
||||
return f"increased from <b>{old:.2%}</b> to <b>{new:.2%}</b>"
|
||||
if diff < 0:
|
||||
return f"decreased from {old:.2%} to {new:.2%}"
|
||||
return f"held steady at {old:.2%}"
|
||||
return f"decreased from <b>{old:.2%}</b> to <b>{new:.2%}</b>"
|
||||
return f"held steady at <b>{old:.2%}</b>"
|
||||
|
||||
def format_int_metric(diff: int, old: int, new: int, total: int) -> str:
|
||||
if diff > 0:
|
||||
return f"improved from <b>{old}/{total}</b> to <b>{new}/{total}</b>"
|
||||
if diff < 0:
|
||||
return f"regressed from <b>{old}/{total}</b> to <b>{new}/{total}</b>"
|
||||
return f"held steady at <b>{old}/{total}</b>"
|
||||
|
||||
old = compute_stats(test_cases, Source.OLD)
|
||||
new = compute_stats(test_cases, Source.NEW)
|
||||
|
||||
old_files_passing = sum(fs.old_passes for fs in file_stats)
|
||||
new_files_passing = sum(fs.new_passes for fs in file_stats)
|
||||
total_files = len(file_stats)
|
||||
files_passing_change = new_files_passing - old_files_passing
|
||||
|
||||
assert new.true_positives > 0, (
|
||||
"Expected ty to have at least one true positive.\n"
|
||||
f"Sample of grouped diagnostics: {test_cases[:5]}"
|
||||
@@ -876,7 +894,9 @@ def render_summary(test_cases: list[TestCase], *, force_summary_table: bool) ->
|
||||
f"The percentage of diagnostics emitted that were expected errors "
|
||||
f"{format_metric(precision_change, old.precision, new.precision)}. "
|
||||
f"The percentage of expected errors that received a diagnostic "
|
||||
f"{format_metric(recall_change, old.recall, new.recall)}."
|
||||
f"{format_metric(recall_change, old.recall, new.recall)}. "
|
||||
f"The number of fully passing files "
|
||||
f"{format_int_metric(files_passing_change, old_files_passing, new_files_passing, total_files)}."
|
||||
)
|
||||
|
||||
base_header = f"[Typing conformance results]({CONFORMANCE_DIR_WITH_README})"
|
||||
@@ -906,13 +926,18 @@ def render_summary(test_cases: list[TestCase], *, force_summary_table: bool) ->
|
||||
precision_diff = diff_format(precision_change, greater_is_better=True)
|
||||
recall_diff = diff_format(recall_change, greater_is_better=True)
|
||||
total_diff = diff_format(total_change, neutral=True)
|
||||
passing_diff = diff_format(files_passing_change, greater_is_better=True)
|
||||
|
||||
if (precision_change > 0 and recall_change >= 0) or (
|
||||
recall_change > 0 and precision_change >= 0
|
||||
if (
|
||||
(precision_change > 0 and recall_change >= 0 and files_passing_change >= 0)
|
||||
or (recall_change > 0 and precision_change >= 0 and files_passing_change >= 0)
|
||||
or (files_passing_change > 0 and precision_change >= 0 and recall_change >= 0)
|
||||
):
|
||||
header = f"{base_header} improved 🎉"
|
||||
elif (precision_change < 0 and recall_change <= 0) or (
|
||||
recall_change < 0 and precision_change <= 0
|
||||
elif (
|
||||
(precision_change < 0 and recall_change <= 0 and files_passing_change <= 0)
|
||||
or (recall_change < 0 and precision_change <= 0 and files_passing_change <= 0)
|
||||
or (files_passing_change < 0 and precision_change <= 0 and recall_change <= 0)
|
||||
):
|
||||
header = f"{base_header} regressed ❌"
|
||||
else:
|
||||
@@ -945,6 +970,7 @@ def render_summary(test_cases: list[TestCase], *, force_summary_table: bool) ->
|
||||
| Total Diagnostics | {old.total_diagnostics} | {new.total_diagnostics} | {total_change:+} | {total_diff} |
|
||||
| Precision | {old.precision:.2%} | {new.precision:.2%} | {precision_change:+.2%} | {precision_diff} |
|
||||
| Recall | {old.recall:.2%} | {new.recall:.2%} | {recall_change:+.2%} | {recall_diff} |
|
||||
| Passing Files | {old_files_passing}/{total_files} | {new_files_passing}/{total_files} | {files_passing_change:+} | {passing_diff} |
|
||||
|
||||
"""
|
||||
)
|
||||
@@ -1060,12 +1086,18 @@ def main():
|
||||
expected=expected,
|
||||
)
|
||||
|
||||
file_stats = collect_file_stats(grouped)
|
||||
|
||||
rendered = "\n\n".join(
|
||||
filter(
|
||||
None,
|
||||
[
|
||||
render_summary(grouped, force_summary_table=args.force_summary_table),
|
||||
render_file_stats_table(grouped),
|
||||
render_summary(
|
||||
grouped,
|
||||
file_stats,
|
||||
force_summary_table=args.force_summary_table,
|
||||
),
|
||||
render_file_stats_table(file_stats),
|
||||
render_test_cases(grouped, format=args.format),
|
||||
],
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user