Add errdetail() with PID and UID about source of termination signal.

When a backend is terminated via pg_terminate_backend() or an external
SIGTERM, the error message now includes the sender's PID and UID as
errdetail, making it easier to identify the source of unexpected
terminations in multi-user environments.

On platforms that support SA_SIGINFO (Linux, FreeBSD, and most modern
Unix systems), the signal handler captures si_pid and si_uid from the
siginfo_t structure.  On platforms without SA_SIGINFO, the detail is
simply omitted.

Author: Jakub Wartak <jakub.wartak@enterprisedb.com>
Reviewed-by: Andrew Dunstan <andrew@dunslane.net>
Reviewed-by: Chao Li <1356863904@qq.com>
Discussion: https://postgr.es/m/CAKZiRmyrOWovZSdixpLd3PGMQXuQL_zw2Ght5XhHCkQ1uDsxjw@mail.gmail.com
This commit is contained in:
Andrew Dunstan
2026-04-06 12:39:14 -04:00
parent c10edb102a
commit 55890a9194
10 changed files with 146 additions and 15 deletions
Vendored
+42
View File
@@ -15797,6 +15797,48 @@ if test "$ac_cv_sizeof_off_t" -lt 8; then
fi
fi
# Check for SA_SIGINFO extended signal handler availability
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for SA_SIGINFO" >&5
$as_echo_n "checking for SA_SIGINFO... " >&6; }
if ${ac_cv_have_sa_siginfo+:} false; then :
$as_echo_n "(cached) " >&6
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
#include <signal.h>
#include <stddef.h>
int
main ()
{
struct sigaction sa;
sa.sa_flags = SA_SIGINFO;
;
return 0;
}
_ACEOF
if ac_fn_c_try_compile "$LINENO"; then :
ac_cv_have_sa_siginfo=yes
else
ac_cv_have_sa_siginfo=no
fi
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_have_sa_siginfo" >&5
$as_echo "$ac_cv_have_sa_siginfo" >&6; }
if test "x$ac_cv_have_sa_siginfo" = "xyes"; then
$as_echo "#define HAVE_SA_SIGINFO 1" >>confdefs.h
fi
##
## Functions, global variables
+18
View File
@@ -1817,6 +1817,24 @@ if test "$ac_cv_sizeof_off_t" -lt 8; then
fi
fi
# Check for SA_SIGINFO extended signal handler availability
AC_CACHE_CHECK([for SA_SIGINFO], [ac_cv_have_sa_siginfo], [
AC_COMPILE_IFELSE([
AC_LANG_PROGRAM([[
#include <signal.h>
#include <stddef.h>
]], [[
struct sigaction sa;
sa.sa_flags = SA_SIGINFO;
]])
],
[ac_cv_have_sa_siginfo=yes],
[ac_cv_have_sa_siginfo=no])
])
if test "x$ac_cv_have_sa_siginfo" = "xyes"; then
AC_DEFINE([HAVE_SA_SIGINFO], 1, [Define to 1 if you have SA_SIGINFO available.])
fi
##
## Functions, global variables
+4
View File
@@ -2985,6 +2985,10 @@ if cc.has_member('struct sockaddr', 'sa_len',
cdata.set('HAVE_STRUCT_SOCKADDR_SA_LEN', 1)
endif
if cc.has_header_symbol('signal.h', 'SA_SIGINFO')
cdata.set('HAVE_SA_SIGINFO', 1)
endif
if cc.has_member('struct tm', 'tm_zone',
args: test_c_args, include_directories: postgres_inc,
prefix: '''
+13 -1
View File
@@ -300,10 +300,22 @@ SyncRepWaitForLSN(XLogRecPtr lsn, bool commit)
*/
if (ProcDiePending)
{
/*
* ProcDieSenderPid/Uid are read directly from the globals here
* rather than copied to locals first; a second SIGTERM could
* change them between reads, but that is harmless because the
* process is about to die anyway. The signal sender detail is
* inlined rather than using a separate errdetail() call because
* it must be appended to the existing detail message.
*/
ereport(WARNING,
(errcode(ERRCODE_ADMIN_SHUTDOWN),
errmsg("canceling the wait for synchronous replication and terminating connection due to administrator command"),
errdetail("The transaction has already committed locally, but might not have been replicated to the standby.")));
errdetail("The transaction has already committed locally, but might not have been replicated to the standby.%s",
ProcDieSenderPid == 0 ? "" :
psprintf("\nSignal sent by PID %d, UID %d.",
(int) ProcDieSenderPid,
(int) ProcDieSenderUid))));
whereToSendOutput = DestNone;
SyncRepCancelWait();
break;
+27 -7
View File
@@ -109,6 +109,14 @@ int client_connection_check_interval = 0;
/* flags for non-system relation kinds to restrict use */
int restrict_nonsystem_relation_kind;
/*
* Include signal sender PID/UID as errdetail when available (SA_SIGINFO).
* The caller must supply the (already-captured) pid and uid values.
*/
#define ERRDETAIL_SIGNAL_SENDER(pid, uid) \
((pid) == 0 ? 0 : \
errdetail("Signal sent by PID %d, UID %d.", (int) (pid), (int) (uid)))
/* ----------------
* private typedefs etc
* ----------------
@@ -3347,7 +3355,12 @@ ProcessInterrupts(void)
if (ProcDiePending)
{
int sender_pid = ProcDieSenderPid;
int sender_uid = ProcDieSenderUid;
ProcDiePending = false;
ProcDieSenderPid = 0;
ProcDieSenderUid = 0;
QueryCancelPending = false; /* ProcDie trumps QueryCancel */
LockErrorCleanup();
/* As in quickdie, don't risk sending to client during auth */
@@ -3360,15 +3373,18 @@ ProcessInterrupts(void)
else if (AmAutoVacuumWorkerProcess())
ereport(FATAL,
(errcode(ERRCODE_ADMIN_SHUTDOWN),
errmsg("terminating autovacuum process due to administrator command")));
errmsg("terminating autovacuum process due to administrator command"),
ERRDETAIL_SIGNAL_SENDER(sender_pid, sender_uid)));
else if (IsLogicalWorker())
ereport(FATAL,
(errcode(ERRCODE_ADMIN_SHUTDOWN),
errmsg("terminating logical replication worker due to administrator command")));
errmsg("terminating logical replication worker due to administrator command"),
ERRDETAIL_SIGNAL_SENDER(sender_pid, sender_uid)));
else if (IsLogicalLauncher())
{
ereport(DEBUG1,
(errmsg_internal("logical replication launcher shutting down")));
(errmsg_internal("logical replication launcher shutting down"),
ERRDETAIL_SIGNAL_SENDER(sender_pid, sender_uid)));
/*
* The logical replication launcher can be stopped at any time.
@@ -3379,23 +3395,27 @@ ProcessInterrupts(void)
else if (AmWalReceiverProcess())
ereport(FATAL,
(errcode(ERRCODE_ADMIN_SHUTDOWN),
errmsg("terminating walreceiver process due to administrator command")));
errmsg("terminating walreceiver process due to administrator command"),
ERRDETAIL_SIGNAL_SENDER(sender_pid, sender_uid)));
else if (AmBackgroundWorkerProcess())
ereport(FATAL,
(errcode(ERRCODE_ADMIN_SHUTDOWN),
errmsg("terminating background worker \"%s\" due to administrator command",
MyBgworkerEntry->bgw_type)));
MyBgworkerEntry->bgw_type),
ERRDETAIL_SIGNAL_SENDER(sender_pid, sender_uid)));
else if (AmIoWorkerProcess())
{
ereport(DEBUG1,
(errmsg_internal("io worker shutting down due to administrator command")));
(errmsg_internal("io worker shutting down due to administrator command"),
ERRDETAIL_SIGNAL_SENDER(sender_pid, sender_uid)));
proc_exit(0);
}
else
ereport(FATAL,
(errcode(ERRCODE_ADMIN_SHUTDOWN),
errmsg("terminating connection due to administrator command")));
errmsg("terminating connection due to administrator command"),
ERRDETAIL_SIGNAL_SENDER(sender_pid, sender_uid)));
}
if (CheckClientConnectionPending)
+2
View File
@@ -43,6 +43,8 @@ volatile sig_atomic_t IdleStatsUpdateTimeoutPending = false;
volatile uint32 InterruptHoldoffCount = 0;
volatile uint32 QueryCancelHoldoffCount = 0;
volatile uint32 CritSectionCount = 0;
volatile int ProcDieSenderPid = 0;
volatile int ProcDieSenderUid = 0;
int MyProcPid;
pg_time_t MyStartTime;
+3 -4
View File
@@ -142,12 +142,11 @@ my ($ret, $out, $err) = $node->psql('postgres',
is($ret, 2, 'server crash: psql exit code');
like($out, qr/before/, 'server crash: output before crash');
unlike($out, qr/AFTER/, 'server crash: no output after crash');
is( $err,
'psql:<stdin>:2: FATAL: terminating connection due to administrator command
psql:<stdin>:2: server closed the connection unexpectedly
like( $err, qr/psql:<stdin>:2: FATAL: terminating connection due to administrator command
(?:DETAIL: Signal sent by PID \d+, UID \d+\.\n)?psql:<stdin>:2: server closed the connection unexpectedly
This probably means the server terminated abnormally
before or while processing the request.
psql:<stdin>:2: error: connection to server was lost',
psql:<stdin>:2: error: connection to server was lost/,
'server crash: error message');
# test \errverbose
+2
View File
@@ -90,6 +90,8 @@
extern PGDLLIMPORT volatile sig_atomic_t InterruptPending;
extern PGDLLIMPORT volatile sig_atomic_t QueryCancelPending;
extern PGDLLIMPORT volatile sig_atomic_t ProcDiePending;
extern PGDLLIMPORT volatile int ProcDieSenderPid;
extern PGDLLIMPORT volatile int ProcDieSenderUid;
extern PGDLLIMPORT volatile sig_atomic_t IdleInTransactionSessionTimeoutPending;
extern PGDLLIMPORT volatile sig_atomic_t TransactionTimeoutPending;
extern PGDLLIMPORT volatile sig_atomic_t IdleSessionTimeoutPending;
+3
View File
@@ -354,6 +354,9 @@
/* Define to 1 if you have the `rl_variable_bind' function. */
#undef HAVE_RL_VARIABLE_BIND
/* Define to 1 if you have SA_SIGINFO available. */
#undef HAVE_SA_SIGINFO
/* Define to 1 if you have the <security/pam_appl.h> header file. */
#undef HAVE_SECURITY_PAM_APPL_H
+32 -3
View File
@@ -82,10 +82,19 @@ static volatile pqsigfunc pqsignal_handlers[PG_NSIG];
*
* This wrapper also handles restoring the value of errno.
*/
#if !defined(FRONTEND) && defined(HAVE_SA_SIGINFO)
static void
wrapper_handler(int signo, siginfo_t * info, void *context)
#else
static void
wrapper_handler(SIGNAL_ARGS)
#endif
{
int save_errno = errno;
#if !defined(FRONTEND) && defined(HAVE_SA_SIGINFO)
/* SA_SIGINFO signature uses signo, not SIGNAL_ARGS macro */
int postgres_signal_arg = signo;
#endif
Assert(postgres_signal_arg > 0);
Assert(postgres_signal_arg < PG_NSIG);
@@ -105,6 +114,14 @@ wrapper_handler(SIGNAL_ARGS)
raise(postgres_signal_arg);
return;
}
#ifdef HAVE_SA_SIGINFO
if (signo == SIGTERM && info)
{
ProcDieSenderPid = info->si_pid;
ProcDieSenderUid = info->si_uid;
}
#endif
#endif
(*pqsignal_handlers[postgres_signal_arg]) (postgres_signal_arg);
@@ -125,6 +142,7 @@ pqsignal(int signo, pqsigfunc func)
#if !(defined(WIN32) && defined(FRONTEND))
struct sigaction act;
#endif
bool use_wrapper = false;
Assert(signo > 0);
Assert(signo < PG_NSIG);
@@ -132,13 +150,24 @@ pqsignal(int signo, pqsigfunc func)
if (func != SIG_IGN && func != SIG_DFL)
{
pqsignal_handlers[signo] = func; /* assumed atomic */
func = wrapper_handler;
use_wrapper = true;
}
#if !(defined(WIN32) && defined(FRONTEND))
act.sa_handler = func;
sigemptyset(&act.sa_mask);
act.sa_flags = SA_RESTART;
#if !defined(FRONTEND) && defined(HAVE_SA_SIGINFO)
if (use_wrapper)
{
act.sa_sigaction = wrapper_handler;
act.sa_flags |= SA_SIGINFO;
}
else
act.sa_handler = func;
#else
act.sa_handler = use_wrapper ? wrapper_handler : func;
#endif
#ifdef SA_NOCLDSTOP
if (signo == SIGCHLD)
act.sa_flags |= SA_NOCLDSTOP;
@@ -147,7 +176,7 @@ pqsignal(int signo, pqsigfunc func)
Assert(false); /* probably indicates coding error */
#else
/* Forward to Windows native signal system. */
if (signal(signo, func) == SIG_ERR)
if (signal(signo, use_wrapper ? wrapper_handler : func) == SIG_ERR)
Assert(false); /* probably indicates coding error */
#endif
}