postgres_fdw: Fix handling of abort-cleanup-failed connections.

As connections that failed abort cleanup can't safely be further used,
if a remote query tries to get such a connection, we reject it.
Previously, this rejection involved dropping the connection if it was
open, without accounting for the possibility of open cursors using it,
causing a server crash when such an open cursor tried to use an
already-dropped connection, as a cursor-handling function
(create_cursor, fetch_more_data, or close_cursor) was called on a freed
PGconn.  To fix, delay dropping failed connections until abort cleanup
of the main transaction, to ensure open cursors using such a connection
can safely refer to the PGconn for it.

Oversight in commit 8bf58c0d9.

Reported-by: Zhibai Song <songzhibai1234@gmail.com>
Diagnosed-by: Zhibai Song <songzhibai1234@gmail.com>
Author: Etsuro Fujita <etsuro.fujita@gmail.com>
Reviewed-by: Michael Paquier <michael@paquier.xyz>
Reviewed-by: Chao Li <li.evan.chao@gmail.com>
Reviewed-by: Matheus Alcantara <matheusssilv97@gmail.com>
Discussion: https://postgr.es/m/CAPmGK176y6JP017-Cn%2BhS9CEJx_6iVhRoYbAqzuLU4d8-XPPNg%40mail.gmail.com
Backpatch-through: 14
This commit is contained in:
Etsuro Fujita
2026-05-05 18:55:00 +09:00
parent d0ed9ad8b0
commit 648818ba38
3 changed files with 133 additions and 4 deletions
+6 -4
View File
@@ -1480,6 +1480,11 @@ pgfdw_inval_callback(Datum arg, SysCacheIdentifier cacheid, uint32 hashvalue)
* Such connections can't safely be further used. Re-establishing the
* connection would change the snapshot and roll back any writes already
* performed, so that's not an option, either. Thus, we must abort.
*
* Note: there might be open cursors that use the connection, so even if the
* connection cache entry is marked as such, we will retain it until abort
* cleanup of the main transaction, to ensure such open cursors can safely
* refer to the PGconn for the connection.
*/
static void
pgfdw_reject_incomplete_xact_state_change(ConnCacheEntry *entry)
@@ -1490,15 +1495,12 @@ pgfdw_reject_incomplete_xact_state_change(ConnCacheEntry *entry)
if (entry->conn == NULL || !entry->changing_xact_state)
return;
/* make sure this entry is inactive */
disconnect_pg_server(entry);
/* find server name to be shown in the message below */
server = GetForeignServer(entry->serverid);
ereport(ERROR,
(errcode(ERRCODE_CONNECTION_EXCEPTION),
errmsg("connection to server \"%s\" was lost",
errmsg("connection to server \"%s\" cannot be used due to abort cleanup failure",
server->servername)));
}
@@ -12977,3 +12977,79 @@ SELECT server_name,
-- Clean up
\set VERBOSITY default
RESET debug_discard_caches;
-- ===================================================================
-- test cleanup of failed connections on abort
-- ===================================================================
CREATE VIEW my_backend_pid (pid) AS SELECT pg_backend_pid();
CREATE FOREIGN TABLE remote_backend_pid (pid int)
SERVER loopback OPTIONS (table_name 'my_backend_pid');
CREATE FUNCTION wait_for_backend_termination(int) RETURNS void AS $$
BEGIN
WHILE (SELECT count(*) FROM pg_stat_activity WHERE pid = $1) > 0
LOOP
PERFORM pg_stat_clear_snapshot();
END LOOP;
END
$$ LANGUAGE plpgsql;
SET client_min_messages = 'ERROR';
BEGIN;
DECLARE c CURSOR FOR SELECT * FROM ft1 ORDER BY c1;
SAVEPOINT s;
SELECT pid AS remote_pid FROM remote_backend_pid \gset
SELECT pg_terminate_backend(:remote_pid);
pg_terminate_backend
----------------------
t
(1 row)
SELECT wait_for_backend_termination(:remote_pid);
wait_for_backend_termination
------------------------------
(1 row)
ROLLBACK TO SAVEPOINT s;
SELECT pid FROM remote_backend_pid;
ERROR: connection to server "loopback" cannot be used due to abort cleanup failure
ROLLBACK TO SAVEPOINT s;
RELEASE SAVEPOINT s;
FETCH c;
ERROR: no connection to the server
CONTEXT: remote SQL command: DECLARE c1 CURSOR FOR
SELECT "C 1", c2, c3, c4, c5, c6, c7, c8 FROM "S 1"."T 1" ORDER BY "C 1" ASC NULLS LAST
ABORT;
BEGIN;
DECLARE c CURSOR FOR SELECT * FROM ft1 ORDER BY c1;
FETCH c;
c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8
----+----+-------------------+------------------------------+--------------------------+----+------------+-----
1 | 2 | 00001_trig_update | Fri Jan 02 00:00:00 1970 PST | Fri Jan 02 00:00:00 1970 | 1 | 1 | foo
(1 row)
SAVEPOINT s;
SELECT pid AS remote_pid FROM remote_backend_pid \gset
SELECT pg_terminate_backend(:remote_pid);
pg_terminate_backend
----------------------
t
(1 row)
SELECT wait_for_backend_termination(:remote_pid);
wait_for_backend_termination
------------------------------
(1 row)
ROLLBACK TO SAVEPOINT s;
SELECT pid FROM remote_backend_pid;
ERROR: connection to server "loopback" cannot be used due to abort cleanup failure
ROLLBACK TO SAVEPOINT s;
RELEASE SAVEPOINT s;
CLOSE c;
ERROR: no connection to the server
CONTEXT: remote SQL command: CLOSE c1
ABORT;
RESET client_min_messages;
DROP FUNCTION wait_for_backend_termination(int);
DROP FOREIGN TABLE remote_backend_pid;
DROP VIEW my_backend_pid;
+51
View File
@@ -4620,3 +4620,54 @@ SELECT server_name,
-- Clean up
\set VERBOSITY default
RESET debug_discard_caches;
-- ===================================================================
-- test cleanup of failed connections on abort
-- ===================================================================
CREATE VIEW my_backend_pid (pid) AS SELECT pg_backend_pid();
CREATE FOREIGN TABLE remote_backend_pid (pid int)
SERVER loopback OPTIONS (table_name 'my_backend_pid');
CREATE FUNCTION wait_for_backend_termination(int) RETURNS void AS $$
BEGIN
WHILE (SELECT count(*) FROM pg_stat_activity WHERE pid = $1) > 0
LOOP
PERFORM pg_stat_clear_snapshot();
END LOOP;
END
$$ LANGUAGE plpgsql;
SET client_min_messages = 'ERROR';
BEGIN;
DECLARE c CURSOR FOR SELECT * FROM ft1 ORDER BY c1;
SAVEPOINT s;
SELECT pid AS remote_pid FROM remote_backend_pid \gset
SELECT pg_terminate_backend(:remote_pid);
SELECT wait_for_backend_termination(:remote_pid);
ROLLBACK TO SAVEPOINT s;
SELECT pid FROM remote_backend_pid;
ROLLBACK TO SAVEPOINT s;
RELEASE SAVEPOINT s;
FETCH c;
ABORT;
BEGIN;
DECLARE c CURSOR FOR SELECT * FROM ft1 ORDER BY c1;
FETCH c;
SAVEPOINT s;
SELECT pid AS remote_pid FROM remote_backend_pid \gset
SELECT pg_terminate_backend(:remote_pid);
SELECT wait_for_backend_termination(:remote_pid);
ROLLBACK TO SAVEPOINT s;
SELECT pid FROM remote_backend_pid;
ROLLBACK TO SAVEPOINT s;
RELEASE SAVEPOINT s;
CLOSE c;
ABORT;
RESET client_min_messages;
DROP FUNCTION wait_for_backend_termination(int);
DROP FOREIGN TABLE remote_backend_pid;
DROP VIEW my_backend_pid;