Files
postgres/src/pl/plpython/expected/plpython_setof.out
T
Tom Lane 9bcb8a694b plpython: Use funccache.c infrastructure for procedure caching.
PL/Python set-returning functions can crash with a use-after-free when
CREATE OR REPLACE FUNCTION is executed while the SRF is mid-iteration.
The crash occurs because srfstate->savedargs is allocated in proc->mcxt,
which gets deleted when the procedure is invalidated, leaving a dangling
pointer that PLy_function_restore_args() then dereferences.

The best fix is to use reference counting to prevent destroying the
function state while it's still in use, similar to what PL/pgSQL has
done.  Rather than inventing a new wheel, this commit converts
PL/Python to use the funccache.c infrastructure.

The main challenge is that PL/Python uses SFRM_ValuePerCall for SRFs,
where the handler is called multiple times.  A naive implementation
would allow the refcount to return to zero between calls, but we need
to hang onto the original state and function body.  SQL-language
functions face the same challenge, so this commit follows the same
approach used in functions.c: maintain a per-call-site cache struct
(PLyProcedureCache) in fn_extra that holds both the pointer to the
long-lived PLyProcedure and the SRF execution state.

The use_count is incremented when we first obtain the procedure and is
decremented via a MemoryContextCallback registered on fn_mcxt, which runs
even during error aborts. Cleaning up the per-call SRF state needs more
care: an ExprContextCallback handles the in-query cases, since the
iterator is not guaranteed to run to completion (for example a LIMIT or a
rescan can abandon it early). But unlike SQL functions, whose resources
are released by transaction abort, PL/Python holds Python reference counts
on the iterator and saved arguments that abort will not release, and
ExprContextCallbacks are not invoked during an error abort. The
MemoryContextCallback on fn_mcxt therefore doubles as the backstop that
releases those references when a query errors out mid-iteration.

Since fn_extra is now used for PLyProcedureCache, this commit removes
use of the funcapi.h SRF infrastructure (SRF_IS_FIRSTCALL,
SRF_RETURN_NEXT, etc.) and switches to direct isDone signaling via
ReturnSetInfo, matching how SQL functions handle ValuePerCall mode.

This fixes a longstanding bug, so ideally we'd back-patch it.  But
it'd be impractical to back-patch further than v18 where funccache.c
came in.  The patch is somewhat invasive, and the bug only arises in
very uncommon usages (which is why it evaded detection for so long).
On the whole, the risk/reward ratio for putting this into v18 doesn't
seem good, so commit to master only.

Bug: #19480
Reported-by: Andrzej Doros <adoros@starfishstorage.com>
Author: Matheus Alcantara <matheusssilv97@gmail.com>
Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://postgr.es/m/19480-f1f9fdce30462fc4@postgresql.org
2026-06-21 15:08:27 -04:00

269 lines
6.0 KiB
Plaintext

--
-- Test returning SETOF
--
CREATE FUNCTION test_setof_error() RETURNS SETOF text AS $$
return 37
$$ LANGUAGE plpython3u;
SELECT test_setof_error();
ERROR: returned object cannot be iterated
DETAIL: PL/Python set-returning functions must return an iterable object.
CONTEXT: PL/Python function "test_setof_error"
CREATE FUNCTION test_setof_as_list(count integer, content text) RETURNS SETOF text AS $$
return [ content ]*count
$$ LANGUAGE plpython3u;
CREATE FUNCTION test_setof_as_tuple(count integer, content text) RETURNS SETOF text AS $$
t = ()
for i in range(count):
t += ( content, )
return t
$$ LANGUAGE plpython3u;
CREATE FUNCTION test_setof_as_set(count integer, content text) RETURNS SETOF text AS $$
s = set()
for i in range(count):
s.add(content * (i + 1) if content is not None else None)
return s
$$ LANGUAGE plpython3u;
CREATE FUNCTION test_setof_as_iterator(count integer, content text) RETURNS SETOF text AS $$
class producer:
def __init__ (self, icount, icontent):
self.icontent = icontent
self.icount = icount
def __iter__ (self):
return self
def __next__ (self):
if self.icount == 0:
raise StopIteration
self.icount -= 1
return self.icontent
return producer(count, content)
$$ LANGUAGE plpython3u;
CREATE FUNCTION test_setof_spi_in_iterator() RETURNS SETOF text AS
$$
for s in ('Hello', 'Brave', 'New', 'World'):
plpy.execute('select 1')
yield s
plpy.execute('select 2')
$$
LANGUAGE plpython3u;
-- Test set returning functions
SELECT test_setof_as_list(0, 'list');
test_setof_as_list
--------------------
(0 rows)
SELECT test_setof_as_list(1, 'list');
test_setof_as_list
--------------------
list
(1 row)
SELECT test_setof_as_list(2, 'list');
test_setof_as_list
--------------------
list
list
(2 rows)
SELECT test_setof_as_list(2, null);
test_setof_as_list
--------------------
(2 rows)
SELECT test_setof_as_tuple(0, 'tuple');
test_setof_as_tuple
---------------------
(0 rows)
SELECT test_setof_as_tuple(1, 'tuple');
test_setof_as_tuple
---------------------
tuple
(1 row)
SELECT test_setof_as_tuple(2, 'tuple');
test_setof_as_tuple
---------------------
tuple
tuple
(2 rows)
SELECT test_setof_as_tuple(2, null);
test_setof_as_tuple
---------------------
(2 rows)
SELECT * FROM test_setof_as_set(0, 'set') ORDER BY 1;
test_setof_as_set
-------------------
(0 rows)
SELECT * FROM test_setof_as_set(1, 'set') ORDER BY 1;
test_setof_as_set
-------------------
set
(1 row)
SELECT * FROM test_setof_as_set(2, 'set') ORDER BY 1;
test_setof_as_set
-------------------
set
setset
(2 rows)
SELECT * FROM test_setof_as_set(2, null) ORDER BY 1;
test_setof_as_set
-------------------
(1 row)
SELECT test_setof_as_iterator(0, 'list');
test_setof_as_iterator
------------------------
(0 rows)
SELECT test_setof_as_iterator(1, 'list');
test_setof_as_iterator
------------------------
list
(1 row)
SELECT test_setof_as_iterator(2, 'list');
test_setof_as_iterator
------------------------
list
list
(2 rows)
SELECT test_setof_as_iterator(2, null);
test_setof_as_iterator
------------------------
(2 rows)
SELECT test_setof_spi_in_iterator();
test_setof_spi_in_iterator
----------------------------
Hello
Brave
New
World
(4 rows)
-- set-returning function that modifies its parameters
CREATE OR REPLACE FUNCTION ugly(x int, lim int) RETURNS SETOF int AS $$
global x
while x <= lim:
yield x
x = x + 1
$$ LANGUAGE plpython3u;
SELECT ugly(1, 5);
ugly
------
1
2
3
4
5
(5 rows)
-- interleaved execution of such a function
SELECT ugly(1,3), ugly(7,8);
ugly | ugly
------+------
1 | 7
2 | 8
3 |
(3 rows)
-- returns set of named-composite-type tuples
CREATE OR REPLACE FUNCTION get_user_records()
RETURNS SETOF users
AS $$
return plpy.execute("SELECT * FROM users ORDER BY username")
$$ LANGUAGE plpython3u;
SELECT get_user_records();
get_user_records
----------------------
(jane,doe,j_doe,1)
(john,doe,johnd,2)
(rick,smith,slash,4)
(willem,doe,w_doe,3)
(4 rows)
SELECT * FROM get_user_records();
fname | lname | username | userid
--------+-------+----------+--------
jane | doe | j_doe | 1
john | doe | johnd | 2
rick | smith | slash | 4
willem | doe | w_doe | 3
(4 rows)
-- same, but returning set of RECORD
CREATE OR REPLACE FUNCTION get_user_records2()
RETURNS TABLE(fname text, lname text, username text, userid int)
AS $$
return plpy.execute("SELECT * FROM users ORDER BY username")
$$ LANGUAGE plpython3u;
SELECT get_user_records2();
get_user_records2
----------------------
(jane,doe,j_doe,1)
(john,doe,johnd,2)
(rick,smith,slash,4)
(willem,doe,w_doe,3)
(4 rows)
SELECT * FROM get_user_records2();
fname | lname | username | userid
--------+-------+----------+--------
jane | doe | j_doe | 1
john | doe | johnd | 2
rick | smith | slash | 4
willem | doe | w_doe | 3
(4 rows)
-- Test partial execution of a set-returning function
SELECT get_user_records2() LIMIT 2;
get_user_records2
--------------------
(jane,doe,j_doe,1)
(john,doe,johnd,2)
(2 rows)
SELECT * FROM get_user_records2() LIMIT 2;
fname | lname | username | userid
-------+-------+----------+--------
jane | doe | j_doe | 1
john | doe | johnd | 2
(2 rows)
-- A set-returning function that is invalidated mid-iteration must run to
-- completion using its original definition (bug #19480).
CREATE OR REPLACE FUNCTION self_invalidating_srf(x int) RETURNS SETOF int AS $$
for i in range(3):
if i == 1:
plpy.execute("CREATE OR REPLACE FUNCTION self_invalidating_srf(x int) "
"RETURNS SETOF int LANGUAGE plpython3u AS 'return [-1]'")
yield x + i
$$ LANGUAGE plpython3u;
SELECT self_invalidating_srf(10); -- expect 10,11,12 (original definition)
self_invalidating_srf
-----------------------
10
11
12
(3 rows)
SELECT self_invalidating_srf(10); -- expect -1 (replacement now in effect)
self_invalidating_srf
-----------------------
-1
(1 row)