Add functionality to show when tests were previously skipped and now failing accurately

This commit is contained in:
Christopher Dryden
2025-11-29 15:47:21 +00:00
parent b2d1117e4e
commit a917791cc8
2 changed files with 80 additions and 16 deletions
+36 -5
View File
@@ -50,14 +50,14 @@ def identify_test_changes(current_flat, reference_flat):
reference_flat (dict): Flattened dictionary of reference test results
Returns:
tuple: Four lists containing regressions, fixes, newly_skipped, and newly_passing tests
tuple: Five lists containing regressions, fixes, newly_skipped, newly_passing, and newly_failing tests
"""
# Find regressions (tests that were passing but now failing)
regressions = []
for test_path, status in current_flat.items():
if status in ("FAIL", "ERROR"):
if test_path in reference_flat:
if reference_flat[test_path] in ("PASS", "SKIP"):
if reference_flat[test_path] == "PASS":
regressions.append(test_path)
# Find fixes (tests that were failing but now passing)
@@ -88,7 +88,17 @@ def identify_test_changes(current_flat, reference_flat):
):
newly_passing.append(test_path)
return regressions, fixes, newly_skipped, newly_passing
# Find newly failing tests (were skipped, now failing)
newly_failing = []
for test_path, status in current_flat.items():
if (
status in ("FAIL", "ERROR")
and test_path in reference_flat
and reference_flat[test_path] == "SKIP"
):
newly_failing.append(test_path)
return regressions, fixes, newly_skipped, newly_passing, newly_failing
def main():
@@ -135,8 +145,8 @@ def main():
reference_flat = flatten_test_results(reference_results)
# Identify different categories of test changes
regressions, fixes, newly_skipped, newly_passing = identify_test_changes(
current_flat, reference_flat
regressions, fixes, newly_skipped, newly_passing, newly_failing = (
identify_test_changes(current_flat, reference_flat)
)
# Filter out intermittent issues from regressions
@@ -147,6 +157,10 @@ def main():
real_fixes = [f for f in fixes if f not in ignore_list]
intermittent_fixes = [f for f in fixes if f in ignore_list]
# Filter out intermittent issues from newly failing
real_newly_failing = [n for n in newly_failing if n not in ignore_list]
intermittent_newly_failing = [n for n in newly_failing if n in ignore_list]
# Print summary stats
print(f"Total tests in current run: {len(current_flat)}")
print(f"Total tests in reference: {len(reference_flat)}")
@@ -156,6 +170,8 @@ def main():
print(f"Intermittent fixes: {len(intermittent_fixes)}")
print(f"Newly skipped tests: {len(newly_skipped)}")
print(f"Newly passing tests (previously skipped): {len(newly_passing)}")
print(f"Newly failing tests (previously skipped): {len(real_newly_failing)}")
print(f"Intermittent newly failing: {len(intermittent_newly_failing)}")
output_lines = []
@@ -206,6 +222,21 @@ def main():
print(f"::notice ::{msg}", file=sys.stderr)
output_lines.append(msg)
# Report newly failing tests (were skipped, now failing)
if real_newly_failing:
print("\nNEWLY FAILING TESTS (previously skipped):", file=sys.stderr)
for test in sorted(real_newly_failing):
msg = f"Note: The gnu test {test} was skipped on 'main' but is now failing."
print(f"::warning ::{msg}", file=sys.stderr)
output_lines.append(msg)
if intermittent_newly_failing:
print("\nINTERMITTENT NEWLY FAILING (ignored):", file=sys.stderr)
for test in sorted(intermittent_newly_failing):
msg = f"Skip an intermittent issue {test} (was skipped on 'main', now failing)"
print(f"::notice ::{msg}", file=sys.stderr)
output_lines.append(msg)
if args.output and output_lines:
with open(args.output, "w") as f:
for line in output_lines:
+44 -11
View File
@@ -129,11 +129,11 @@ class TestIdentifyTestChanges(unittest.TestCase):
}
reference = {
"tests/ls/test1": "PASS",
"tests/ls/test2": "SKIP",
"tests/ls/test2": "PASS",
"tests/cp/test3": "PASS",
"tests/cp/test4": "FAIL",
}
regressions, _, _, _ = identify_test_changes(current, reference)
regressions, _, _, _, _ = identify_test_changes(current, reference)
self.assertEqual(sorted(regressions), ["tests/ls/test1", "tests/ls/test2"])
def test_fixes(self):
@@ -150,7 +150,7 @@ class TestIdentifyTestChanges(unittest.TestCase):
"tests/cp/test3": "PASS",
"tests/cp/test4": "FAIL",
}
_, fixes, _, _ = identify_test_changes(current, reference)
_, fixes, _, _, _ = identify_test_changes(current, reference)
self.assertEqual(sorted(fixes), ["tests/ls/test1", "tests/ls/test2"])
def test_newly_skipped(self):
@@ -165,7 +165,7 @@ class TestIdentifyTestChanges(unittest.TestCase):
"tests/ls/test2": "FAIL",
"tests/cp/test3": "PASS",
}
_, _, newly_skipped, _ = identify_test_changes(current, reference)
_, _, newly_skipped, _, _ = identify_test_changes(current, reference)
self.assertEqual(newly_skipped, ["tests/ls/test1"])
def test_newly_passing(self):
@@ -180,7 +180,7 @@ class TestIdentifyTestChanges(unittest.TestCase):
"tests/ls/test2": "FAIL",
"tests/cp/test3": "SKIP",
}
_, _, _, newly_passing = identify_test_changes(current, reference)
_, _, _, newly_passing, _ = identify_test_changes(current, reference)
self.assertEqual(newly_passing, ["tests/ls/test1"])
def test_all_categories(self):
@@ -191,6 +191,7 @@ class TestIdentifyTestChanges(unittest.TestCase):
"tests/cp/test3": "SKIP", # Newly skipped
"tests/cp/test4": "PASS", # Newly passing
"tests/rm/test5": "PASS", # No change
"tests/rm/test6": "FAIL", # Newly failing
}
reference = {
"tests/ls/test1": "PASS", # Regression
@@ -198,14 +199,16 @@ class TestIdentifyTestChanges(unittest.TestCase):
"tests/cp/test3": "PASS", # Newly skipped
"tests/cp/test4": "SKIP", # Newly passing
"tests/rm/test5": "PASS", # No change
"tests/rm/test6": "SKIP", # Newly failing
}
regressions, fixes, newly_skipped, newly_passing = identify_test_changes(
current, reference
regressions, fixes, newly_skipped, newly_passing, newly_failing = (
identify_test_changes(current, reference)
)
self.assertEqual(regressions, ["tests/ls/test1"])
self.assertEqual(fixes, ["tests/ls/test2"])
self.assertEqual(newly_skipped, ["tests/cp/test3"])
self.assertEqual(newly_passing, ["tests/cp/test4"])
self.assertEqual(newly_failing, ["tests/rm/test6"])
def test_new_and_removed_tests(self):
"""Test handling of tests that are only in one of the datasets."""
@@ -219,13 +222,43 @@ class TestIdentifyTestChanges(unittest.TestCase):
"tests/ls/test2": "PASS",
"tests/rm/old_test": "FAIL",
}
regressions, fixes, newly_skipped, newly_passing = identify_test_changes(
current, reference
regressions, fixes, newly_skipped, newly_passing, newly_failing = (
identify_test_changes(current, reference)
)
self.assertEqual(regressions, ["tests/ls/test2"])
self.assertEqual(fixes, [])
self.assertEqual(newly_skipped, [])
self.assertEqual(newly_passing, [])
self.assertEqual(newly_failing, [])
def test_newly_failing(self):
"""Test identifying newly failing tests (SKIP -> FAIL)."""
current = {
"tests/ls/test1": "FAIL",
"tests/ls/test2": "ERROR",
"tests/cp/test3": "PASS",
}
reference = {
"tests/ls/test1": "SKIP",
"tests/ls/test2": "SKIP",
"tests/cp/test3": "SKIP",
}
_, _, _, _, newly_failing = identify_test_changes(current, reference)
self.assertEqual(sorted(newly_failing), ["tests/ls/test1", "tests/ls/test2"])
def test_skip_to_fail_not_regression(self):
"""Test that SKIP -> FAIL is not counted as a regression."""
current = {
"tests/ls/test1": "FAIL",
"tests/ls/test2": "FAIL",
}
reference = {
"tests/ls/test1": "SKIP",
"tests/ls/test2": "PASS",
}
regressions, _, _, _, newly_failing = identify_test_changes(current, reference)
self.assertEqual(regressions, ["tests/ls/test2"])
self.assertEqual(newly_failing, ["tests/ls/test1"])
class TestMainFunction(unittest.TestCase):
@@ -285,7 +318,7 @@ class TestMainFunction(unittest.TestCase):
current_flat = flatten_test_results(self.current_data)
reference_flat = flatten_test_results(self.reference_data)
regressions, _, _, _ = identify_test_changes(current_flat, reference_flat)
regressions, _, _, _, _ = identify_test_changes(current_flat, reference_flat)
self.assertIn("tests/ls/test2", regressions)
@@ -320,7 +353,7 @@ class TestMainFunction(unittest.TestCase):
current_flat = flatten_test_results(self.current_data)
reference_flat = flatten_test_results(self.reference_data)
_, fixes, _, _ = identify_test_changes(current_flat, reference_flat)
_, fixes, _, _, _ = identify_test_changes(current_flat, reference_flat)
# tests/cp/test1 and tests/cp/test2 should be fixed but tests/cp/test1 is in ignore list
self.assertIn("tests/cp/test1", fixes)