gh-149009: Validate thread_count in profiling.sampling binary reader (#149147)

This commit is contained in:
Maurycy Pawłowski-Wieroński
2026-05-05 02:50:06 +02:00
committed by GitHub
parent 04ce318522
commit c266f0c375
3 changed files with 41 additions and 0 deletions
@@ -3,6 +3,7 @@
import json
import os
import random
import struct
import tempfile
import unittest
from collections import defaultdict
@@ -941,6 +942,35 @@ class TestBinaryEdgeCases(BinaryFormatTestBase):
self.assertEqual(w.total_samples, 0)
class TestBinaryFormatValidation(BinaryFormatTestBase):
"""Tests for malformed binary files."""
HDR_OFF_THREADS = 32
def test_replay_rejects_more_threads_than_declared(self):
"""Replay rejects files with more unique threads than the header declares."""
threads = [
make_thread(1, [make_frame("t1.py", 10, "t1")]),
make_thread(2, [make_frame("t2.py", 20, "t2")]),
]
samples = [[make_interpreter(0, threads)]]
filename = self.create_binary_file(samples, compression="none")
with open(filename, "r+b") as raw:
raw.seek(self.HDR_OFF_THREADS)
raw.write(struct.pack("=I", 1))
with BinaryReader(filename) as reader:
self.assertEqual(reader.get_info()["thread_count"], 1)
with self.assertRaises(ValueError) as cm:
reader.replay_samples(RawCollector())
self.assertEqual(
str(cm.exception),
"Invalid thread count: sample data contains more unique "
"threads than declared in header (declared 1, found at least 2)",
)
class TestBinaryEncodings(BinaryFormatTestBase):
"""Tests specifically targeting different stack encodings."""
@@ -0,0 +1,3 @@
Validate that :mod:`profiling.sampling` binary profiles do not contain more
unique (thread, interpreter) pairs than declared in the header. Patch by
Maurycy Pawłowski-Wieroński.
@@ -559,6 +559,14 @@ reader_get_or_create_thread_state(BinaryReader *reader, uint64_t thread_id,
}
}
if (reader->thread_state_count >= reader->thread_count) {
PyErr_Format(PyExc_ValueError,
"Invalid thread count: sample data contains more unique threads than declared in header "
"(declared %u, found at least %zu)",
reader->thread_count, reader->thread_state_count + 1);
return NULL;
}
if (!reader->thread_states) {
reader->thread_state_capacity = 16;
reader->thread_states = PyMem_Calloc(reader->thread_state_capacity, sizeof(ReaderThreadState));