From c266f0c375c2e60ea46046254fa7cd5fa2fe1ca2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maurycy=20Paw=C5=82owski-Wiero=C5=84ski?= Date: Tue, 5 May 2026 02:50:06 +0200 Subject: [PATCH] gh-149009: Validate `thread_count` in `profiling.sampling` binary reader (#149147) --- .../test_binary_format.py | 30 +++++++++++++++++++ ...-04-29-13-08-46.gh-issue-149009.rek3Tw.rst | 3 ++ Modules/_remote_debugging/binary_io_reader.c | 8 +++++ 3 files changed, 41 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2026-04-29-13-08-46.gh-issue-149009.rek3Tw.rst diff --git a/Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py b/Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py index ca6cb6befae..9cf706aa2da 100644 --- a/Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py +++ b/Lib/test/test_profiling/test_sampling_profiler/test_binary_format.py @@ -3,6 +3,7 @@ import json import os import random +import struct import tempfile import unittest from collections import defaultdict @@ -941,6 +942,35 @@ class TestBinaryEdgeCases(BinaryFormatTestBase): self.assertEqual(w.total_samples, 0) +class TestBinaryFormatValidation(BinaryFormatTestBase): + """Tests for malformed binary files.""" + + HDR_OFF_THREADS = 32 + + def test_replay_rejects_more_threads_than_declared(self): + """Replay rejects files with more unique threads than the header declares.""" + threads = [ + make_thread(1, [make_frame("t1.py", 10, "t1")]), + make_thread(2, [make_frame("t2.py", 20, "t2")]), + ] + samples = [[make_interpreter(0, threads)]] + filename = self.create_binary_file(samples, compression="none") + + with open(filename, "r+b") as raw: + raw.seek(self.HDR_OFF_THREADS) + raw.write(struct.pack("=I", 1)) + + with BinaryReader(filename) as reader: + self.assertEqual(reader.get_info()["thread_count"], 1) + with self.assertRaises(ValueError) as cm: + reader.replay_samples(RawCollector()) + self.assertEqual( + str(cm.exception), + "Invalid thread count: sample data contains more unique " + "threads than declared in header (declared 1, found at least 2)", + ) + + class TestBinaryEncodings(BinaryFormatTestBase): """Tests specifically targeting different stack encodings.""" diff --git a/Misc/NEWS.d/next/Library/2026-04-29-13-08-46.gh-issue-149009.rek3Tw.rst b/Misc/NEWS.d/next/Library/2026-04-29-13-08-46.gh-issue-149009.rek3Tw.rst new file mode 100644 index 00000000000..e2f07874276 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-04-29-13-08-46.gh-issue-149009.rek3Tw.rst @@ -0,0 +1,3 @@ +Validate that :mod:`profiling.sampling` binary profiles do not contain more +unique (thread, interpreter) pairs than declared in the header. Patch by +Maurycy Pawłowski-Wieroński. diff --git a/Modules/_remote_debugging/binary_io_reader.c b/Modules/_remote_debugging/binary_io_reader.c index da3e7d55309..551530b5199 100644 --- a/Modules/_remote_debugging/binary_io_reader.c +++ b/Modules/_remote_debugging/binary_io_reader.c @@ -559,6 +559,14 @@ reader_get_or_create_thread_state(BinaryReader *reader, uint64_t thread_id, } } + if (reader->thread_state_count >= reader->thread_count) { + PyErr_Format(PyExc_ValueError, + "Invalid thread count: sample data contains more unique threads than declared in header " + "(declared %u, found at least %zu)", + reader->thread_count, reader->thread_state_count + 1); + return NULL; + } + if (!reader->thread_states) { reader->thread_state_capacity = 16; reader->thread_states = PyMem_Calloc(reader->thread_state_capacity, sizeof(ReaderThreadState));