Add functionality to show when tests were previously skipped and now failing accurately

2026-05-06 07:26:38 -04:00 · 2025-11-29 15:47:21 +00:00
parent b2d1117e4e
commit a917791cc8
2 changed files with 80 additions and 16 deletions
@@ -50,14 +50,14 @@ def identify_test_changes(current_flat, reference_flat):
        reference_flat (dict): Flattened dictionary of reference test results

    Returns:
-        tuple: Four lists containing regressions, fixes, newly_skipped, and newly_passing tests
+        tuple: Five lists containing regressions, fixes, newly_skipped, newly_passing, and newly_failing tests
    """
    # Find regressions (tests that were passing but now failing)
    regressions = []
    for test_path, status in current_flat.items():
        if status in ("FAIL", "ERROR"):
            if test_path in reference_flat:
-                if reference_flat[test_path] in ("PASS", "SKIP"):
+                if reference_flat[test_path] == "PASS":
                    regressions.append(test_path)

    # Find fixes (tests that were failing but now passing)
@@ -88,7 +88,17 @@ def identify_test_changes(current_flat, reference_flat):
        ):
            newly_passing.append(test_path)

-    return regressions, fixes, newly_skipped, newly_passing
+    # Find newly failing tests (were skipped, now failing)
+    newly_failing = []
+    for test_path, status in current_flat.items():
+        if (
+            status in ("FAIL", "ERROR")
+            and test_path in reference_flat
+            and reference_flat[test_path] == "SKIP"
+        ):
+            newly_failing.append(test_path)
+
+    return regressions, fixes, newly_skipped, newly_passing, newly_failing


 def main():
@@ -135,8 +145,8 @@ def main():
    reference_flat = flatten_test_results(reference_results)

    # Identify different categories of test changes
-    regressions, fixes, newly_skipped, newly_passing = identify_test_changes(
-        current_flat, reference_flat
+    regressions, fixes, newly_skipped, newly_passing, newly_failing = (
+        identify_test_changes(current_flat, reference_flat)
    )

    # Filter out intermittent issues from regressions
@@ -147,6 +157,10 @@ def main():
    real_fixes = [f for f in fixes if f not in ignore_list]
    intermittent_fixes = [f for f in fixes if f in ignore_list]

+    # Filter out intermittent issues from newly failing
+    real_newly_failing = [n for n in newly_failing if n not in ignore_list]
+    intermittent_newly_failing = [n for n in newly_failing if n in ignore_list]
+
    # Print summary stats
    print(f"Total tests in current run: {len(current_flat)}")
    print(f"Total tests in reference: {len(reference_flat)}")
@@ -156,6 +170,8 @@ def main():
    print(f"Intermittent fixes: {len(intermittent_fixes)}")
    print(f"Newly skipped tests: {len(newly_skipped)}")
    print(f"Newly passing tests (previously skipped): {len(newly_passing)}")
+    print(f"Newly failing tests (previously skipped): {len(real_newly_failing)}")
+    print(f"Intermittent newly failing: {len(intermittent_newly_failing)}")

    output_lines = []

@@ -206,6 +222,21 @@ def main():
            print(f"::notice ::{msg}", file=sys.stderr)
            output_lines.append(msg)

+    # Report newly failing tests (were skipped, now failing)
+    if real_newly_failing:
+        print("\nNEWLY FAILING TESTS (previously skipped):", file=sys.stderr)
+        for test in sorted(real_newly_failing):
+            msg = f"Note: The gnu test {test} was skipped on 'main' but is now failing."
+            print(f"::warning ::{msg}", file=sys.stderr)
+            output_lines.append(msg)
+
+    if intermittent_newly_failing:
+        print("\nINTERMITTENT NEWLY FAILING (ignored):", file=sys.stderr)
+        for test in sorted(intermittent_newly_failing):
+            msg = f"Skip an intermittent issue {test} (was skipped on 'main', now failing)"
+            print(f"::notice ::{msg}", file=sys.stderr)
+            output_lines.append(msg)
+
    if args.output and output_lines:
        with open(args.output, "w") as f:
            for line in output_lines:
@@ -129,11 +129,11 @@ class TestIdentifyTestChanges(unittest.TestCase):
        }
        reference = {
            "tests/ls/test1": "PASS",
-            "tests/ls/test2": "SKIP",
+            "tests/ls/test2": "PASS",
            "tests/cp/test3": "PASS",
            "tests/cp/test4": "FAIL",
        }
-        regressions, _, _, _ = identify_test_changes(current, reference)
+        regressions, _, _, _, _ = identify_test_changes(current, reference)
        self.assertEqual(sorted(regressions), ["tests/ls/test1", "tests/ls/test2"])

    def test_fixes(self):
@@ -150,7 +150,7 @@ class TestIdentifyTestChanges(unittest.TestCase):
            "tests/cp/test3": "PASS",
            "tests/cp/test4": "FAIL",
        }
-        _, fixes, _, _ = identify_test_changes(current, reference)
+        _, fixes, _, _, _ = identify_test_changes(current, reference)
        self.assertEqual(sorted(fixes), ["tests/ls/test1", "tests/ls/test2"])

    def test_newly_skipped(self):
@@ -165,7 +165,7 @@ class TestIdentifyTestChanges(unittest.TestCase):
            "tests/ls/test2": "FAIL",
            "tests/cp/test3": "PASS",
        }
-        _, _, newly_skipped, _ = identify_test_changes(current, reference)
+        _, _, newly_skipped, _, _ = identify_test_changes(current, reference)
        self.assertEqual(newly_skipped, ["tests/ls/test1"])

    def test_newly_passing(self):
@@ -180,7 +180,7 @@ class TestIdentifyTestChanges(unittest.TestCase):
            "tests/ls/test2": "FAIL",
            "tests/cp/test3": "SKIP",
        }
-        _, _, _, newly_passing = identify_test_changes(current, reference)
+        _, _, _, newly_passing, _ = identify_test_changes(current, reference)
        self.assertEqual(newly_passing, ["tests/ls/test1"])

    def test_all_categories(self):
@@ -191,6 +191,7 @@ class TestIdentifyTestChanges(unittest.TestCase):
            "tests/cp/test3": "SKIP",  # Newly skipped
            "tests/cp/test4": "PASS",  # Newly passing
            "tests/rm/test5": "PASS",  # No change
+            "tests/rm/test6": "FAIL",  # Newly failing
        }
        reference = {
            "tests/ls/test1": "PASS",  # Regression
@@ -198,14 +199,16 @@ class TestIdentifyTestChanges(unittest.TestCase):
            "tests/cp/test3": "PASS",  # Newly skipped
            "tests/cp/test4": "SKIP",  # Newly passing
            "tests/rm/test5": "PASS",  # No change
+            "tests/rm/test6": "SKIP",  # Newly failing
        }
-        regressions, fixes, newly_skipped, newly_passing = identify_test_changes(
-            current, reference
+        regressions, fixes, newly_skipped, newly_passing, newly_failing = (
+            identify_test_changes(current, reference)
        )
        self.assertEqual(regressions, ["tests/ls/test1"])
        self.assertEqual(fixes, ["tests/ls/test2"])
        self.assertEqual(newly_skipped, ["tests/cp/test3"])
        self.assertEqual(newly_passing, ["tests/cp/test4"])
+        self.assertEqual(newly_failing, ["tests/rm/test6"])

    def test_new_and_removed_tests(self):
        """Test handling of tests that are only in one of the datasets."""
@@ -219,13 +222,43 @@ class TestIdentifyTestChanges(unittest.TestCase):
            "tests/ls/test2": "PASS",
            "tests/rm/old_test": "FAIL",
        }
-        regressions, fixes, newly_skipped, newly_passing = identify_test_changes(
-            current, reference
+        regressions, fixes, newly_skipped, newly_passing, newly_failing = (
+            identify_test_changes(current, reference)
        )
        self.assertEqual(regressions, ["tests/ls/test2"])
        self.assertEqual(fixes, [])
        self.assertEqual(newly_skipped, [])
        self.assertEqual(newly_passing, [])
+        self.assertEqual(newly_failing, [])
+
+    def test_newly_failing(self):
+        """Test identifying newly failing tests (SKIP -> FAIL)."""
+        current = {
+            "tests/ls/test1": "FAIL",
+            "tests/ls/test2": "ERROR",
+            "tests/cp/test3": "PASS",
+        }
+        reference = {
+            "tests/ls/test1": "SKIP",
+            "tests/ls/test2": "SKIP",
+            "tests/cp/test3": "SKIP",
+        }
+        _, _, _, _, newly_failing = identify_test_changes(current, reference)
+        self.assertEqual(sorted(newly_failing), ["tests/ls/test1", "tests/ls/test2"])
+
+    def test_skip_to_fail_not_regression(self):
+        """Test that SKIP -> FAIL is not counted as a regression."""
+        current = {
+            "tests/ls/test1": "FAIL",
+            "tests/ls/test2": "FAIL",
+        }
+        reference = {
+            "tests/ls/test1": "SKIP",
+            "tests/ls/test2": "PASS",
+        }
+        regressions, _, _, _, newly_failing = identify_test_changes(current, reference)
+        self.assertEqual(regressions, ["tests/ls/test2"])
+        self.assertEqual(newly_failing, ["tests/ls/test1"])


 class TestMainFunction(unittest.TestCase):
@@ -285,7 +318,7 @@ class TestMainFunction(unittest.TestCase):
        current_flat = flatten_test_results(self.current_data)
        reference_flat = flatten_test_results(self.reference_data)

-        regressions, _, _, _ = identify_test_changes(current_flat, reference_flat)
+        regressions, _, _, _, _ = identify_test_changes(current_flat, reference_flat)

        self.assertIn("tests/ls/test2", regressions)

@@ -320,7 +353,7 @@ class TestMainFunction(unittest.TestCase):
        current_flat = flatten_test_results(self.current_data)
        reference_flat = flatten_test_results(self.reference_data)

-        _, fixes, _, _ = identify_test_changes(current_flat, reference_flat)
+        _, fixes, _, _, _ = identify_test_changes(current_flat, reference_flat)

        # tests/cp/test1 and tests/cp/test2 should be fixed but tests/cp/test1 is in ignore list
        self.assertIn("tests/cp/test1", fixes)