[3.15] gh-150599: Prevent bz2 decompressor reuse after errors (GH-150600)

(cherry picked from commit 5755d0f083)

Co-authored-by: Stan Ulbrych <stan@python.org>
This commit is contained in:
Miss Islington (bot)
2026-06-07 18:48:30 +02:00
committed by GitHub
parent a642d1ab38
commit d3ca26983d
3 changed files with 33 additions and 3 deletions
+15
View File
@@ -1032,6 +1032,21 @@ class BZ2DecompressorTest(BaseTest):
# Previously, a second call could crash due to internal inconsistency
self.assertRaises(Exception, bzd.decompress, self.BAD_DATA * 30)
def test_decompress_after_data_error(self):
data = bytes.fromhex(
"425a6839314159265359000000000000007fffff000000000000000000000000"
"00000000000000000000000000000000000000e0370000000000000000000000"
"000000000000000000000000000000000000000000000000000083f3"
)
bzd = BZ2Decompressor()
with self.assertRaisesRegex(OSError, "Invalid data stream"):
bzd.decompress(data)
# Previously, a second call could crash due to internal inconsistency
self.assertFalse(bzd.needs_input)
self.assertFalse(bzd.eof)
with self.assertRaisesRegex(ValueError, "previous error"):
bzd.decompress(b'\x00' * 18)
@support.refcount_test
def test_refleaks_in___init__(self):
gettotalrefcount = support.get_attribute(sys, 'gettotalrefcount')
@@ -0,0 +1,3 @@
Fix a possible stack buffer overflow in :mod:`bz2` when a
:class:`bz2.BZ2Decompressor` is reused after a decompression error.
The decompressor now becomes unusable after libbz2 reports an error.
+15 -3
View File
@@ -108,6 +108,7 @@ typedef struct {
typedef struct {
PyObject_HEAD
bz_stream bzs;
int bzerror;
char eof; /* Py_T_BOOL expects a char */
PyObject *unused_data;
char needs_input;
@@ -435,8 +436,11 @@ decompress_buf(BZ2Decompressor *d, Py_ssize_t max_length)
d->bzs_avail_in_real += bzs->avail_in;
if (catch_bz2_error(bzret))
if (catch_bz2_error(bzret)) {
d->bzerror = bzret;
FT_ATOMIC_STORE_CHAR_RELAXED(d->needs_input, 0);
goto error;
}
if (bzret == BZ_STREAM_END) {
FT_ATOMIC_STORE_CHAR_RELAXED(d->eof, 1);
break;
@@ -607,10 +611,17 @@ _bz2_BZ2Decompressor_decompress_impl(BZ2Decompressor *self, Py_buffer *data,
PyObject *result = NULL;
PyMutex_Lock(&self->mutex);
if (self->eof)
if (self->eof) {
PyErr_SetString(PyExc_EOFError, "End of stream already reached");
else
}
else if (self->bzerror) {
// Re-entering BZ2_bzDecompress() after an error can write out of bounds.
PyErr_SetString(PyExc_ValueError,
"Decompressor is unusable after a previous error");
}
else {
result = decompress(self, data->buf, data->len, max_length);
}
PyMutex_Unlock(&self->mutex);
return result;
}
@@ -638,6 +649,7 @@ _bz2_BZ2Decompressor_impl(PyTypeObject *type)
}
self->mutex = (PyMutex){0};
self->bzerror = 0;
self->needs_input = 1;
self->bzs_avail_in_real = 0;
self->input_buffer = NULL;