mirror of
https://github.com/postgres/postgres.git
synced 2026-05-06 16:59:45 -04:00
Add errdetail() with PID and UID about source of termination signal.
When a backend is terminated via pg_terminate_backend() or an external SIGTERM, the error message now includes the sender's PID and UID as errdetail, making it easier to identify the source of unexpected terminations in multi-user environments. On platforms that support SA_SIGINFO (Linux, FreeBSD, and most modern Unix systems), the signal handler captures si_pid and si_uid from the siginfo_t structure. On platforms without SA_SIGINFO, the detail is simply omitted. Author: Jakub Wartak <jakub.wartak@enterprisedb.com> Reviewed-by: Andrew Dunstan <andrew@dunslane.net> Reviewed-by: Chao Li <1356863904@qq.com> Discussion: https://postgr.es/m/CAKZiRmyrOWovZSdixpLd3PGMQXuQL_zw2Ght5XhHCkQ1uDsxjw@mail.gmail.com
This commit is contained in:
@@ -15797,6 +15797,48 @@ if test "$ac_cv_sizeof_off_t" -lt 8; then
|
||||
fi
|
||||
fi
|
||||
|
||||
# Check for SA_SIGINFO extended signal handler availability
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for SA_SIGINFO" >&5
|
||||
$as_echo_n "checking for SA_SIGINFO... " >&6; }
|
||||
if ${ac_cv_have_sa_siginfo+:} false; then :
|
||||
$as_echo_n "(cached) " >&6
|
||||
else
|
||||
|
||||
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
|
||||
/* end confdefs.h. */
|
||||
|
||||
|
||||
#include <signal.h>
|
||||
#include <stddef.h>
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
|
||||
struct sigaction sa;
|
||||
sa.sa_flags = SA_SIGINFO;
|
||||
|
||||
;
|
||||
return 0;
|
||||
}
|
||||
|
||||
_ACEOF
|
||||
if ac_fn_c_try_compile "$LINENO"; then :
|
||||
ac_cv_have_sa_siginfo=yes
|
||||
else
|
||||
ac_cv_have_sa_siginfo=no
|
||||
fi
|
||||
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
|
||||
|
||||
fi
|
||||
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_have_sa_siginfo" >&5
|
||||
$as_echo "$ac_cv_have_sa_siginfo" >&6; }
|
||||
|
||||
if test "x$ac_cv_have_sa_siginfo" = "xyes"; then
|
||||
|
||||
$as_echo "#define HAVE_SA_SIGINFO 1" >>confdefs.h
|
||||
|
||||
fi
|
||||
|
||||
##
|
||||
## Functions, global variables
|
||||
|
||||
@@ -1817,6 +1817,24 @@ if test "$ac_cv_sizeof_off_t" -lt 8; then
|
||||
fi
|
||||
fi
|
||||
|
||||
# Check for SA_SIGINFO extended signal handler availability
|
||||
AC_CACHE_CHECK([for SA_SIGINFO], [ac_cv_have_sa_siginfo], [
|
||||
AC_COMPILE_IFELSE([
|
||||
AC_LANG_PROGRAM([[
|
||||
#include <signal.h>
|
||||
#include <stddef.h>
|
||||
]], [[
|
||||
struct sigaction sa;
|
||||
sa.sa_flags = SA_SIGINFO;
|
||||
]])
|
||||
],
|
||||
[ac_cv_have_sa_siginfo=yes],
|
||||
[ac_cv_have_sa_siginfo=no])
|
||||
])
|
||||
|
||||
if test "x$ac_cv_have_sa_siginfo" = "xyes"; then
|
||||
AC_DEFINE([HAVE_SA_SIGINFO], 1, [Define to 1 if you have SA_SIGINFO available.])
|
||||
fi
|
||||
|
||||
##
|
||||
## Functions, global variables
|
||||
|
||||
@@ -2985,6 +2985,10 @@ if cc.has_member('struct sockaddr', 'sa_len',
|
||||
cdata.set('HAVE_STRUCT_SOCKADDR_SA_LEN', 1)
|
||||
endif
|
||||
|
||||
if cc.has_header_symbol('signal.h', 'SA_SIGINFO')
|
||||
cdata.set('HAVE_SA_SIGINFO', 1)
|
||||
endif
|
||||
|
||||
if cc.has_member('struct tm', 'tm_zone',
|
||||
args: test_c_args, include_directories: postgres_inc,
|
||||
prefix: '''
|
||||
|
||||
@@ -300,10 +300,22 @@ SyncRepWaitForLSN(XLogRecPtr lsn, bool commit)
|
||||
*/
|
||||
if (ProcDiePending)
|
||||
{
|
||||
/*
|
||||
* ProcDieSenderPid/Uid are read directly from the globals here
|
||||
* rather than copied to locals first; a second SIGTERM could
|
||||
* change them between reads, but that is harmless because the
|
||||
* process is about to die anyway. The signal sender detail is
|
||||
* inlined rather than using a separate errdetail() call because
|
||||
* it must be appended to the existing detail message.
|
||||
*/
|
||||
ereport(WARNING,
|
||||
(errcode(ERRCODE_ADMIN_SHUTDOWN),
|
||||
errmsg("canceling the wait for synchronous replication and terminating connection due to administrator command"),
|
||||
errdetail("The transaction has already committed locally, but might not have been replicated to the standby.")));
|
||||
errdetail("The transaction has already committed locally, but might not have been replicated to the standby.%s",
|
||||
ProcDieSenderPid == 0 ? "" :
|
||||
psprintf("\nSignal sent by PID %d, UID %d.",
|
||||
(int) ProcDieSenderPid,
|
||||
(int) ProcDieSenderUid))));
|
||||
whereToSendOutput = DestNone;
|
||||
SyncRepCancelWait();
|
||||
break;
|
||||
|
||||
@@ -109,6 +109,14 @@ int client_connection_check_interval = 0;
|
||||
/* flags for non-system relation kinds to restrict use */
|
||||
int restrict_nonsystem_relation_kind;
|
||||
|
||||
/*
|
||||
* Include signal sender PID/UID as errdetail when available (SA_SIGINFO).
|
||||
* The caller must supply the (already-captured) pid and uid values.
|
||||
*/
|
||||
#define ERRDETAIL_SIGNAL_SENDER(pid, uid) \
|
||||
((pid) == 0 ? 0 : \
|
||||
errdetail("Signal sent by PID %d, UID %d.", (int) (pid), (int) (uid)))
|
||||
|
||||
/* ----------------
|
||||
* private typedefs etc
|
||||
* ----------------
|
||||
@@ -3347,7 +3355,12 @@ ProcessInterrupts(void)
|
||||
|
||||
if (ProcDiePending)
|
||||
{
|
||||
int sender_pid = ProcDieSenderPid;
|
||||
int sender_uid = ProcDieSenderUid;
|
||||
|
||||
ProcDiePending = false;
|
||||
ProcDieSenderPid = 0;
|
||||
ProcDieSenderUid = 0;
|
||||
QueryCancelPending = false; /* ProcDie trumps QueryCancel */
|
||||
LockErrorCleanup();
|
||||
/* As in quickdie, don't risk sending to client during auth */
|
||||
@@ -3360,15 +3373,18 @@ ProcessInterrupts(void)
|
||||
else if (AmAutoVacuumWorkerProcess())
|
||||
ereport(FATAL,
|
||||
(errcode(ERRCODE_ADMIN_SHUTDOWN),
|
||||
errmsg("terminating autovacuum process due to administrator command")));
|
||||
errmsg("terminating autovacuum process due to administrator command"),
|
||||
ERRDETAIL_SIGNAL_SENDER(sender_pid, sender_uid)));
|
||||
else if (IsLogicalWorker())
|
||||
ereport(FATAL,
|
||||
(errcode(ERRCODE_ADMIN_SHUTDOWN),
|
||||
errmsg("terminating logical replication worker due to administrator command")));
|
||||
errmsg("terminating logical replication worker due to administrator command"),
|
||||
ERRDETAIL_SIGNAL_SENDER(sender_pid, sender_uid)));
|
||||
else if (IsLogicalLauncher())
|
||||
{
|
||||
ereport(DEBUG1,
|
||||
(errmsg_internal("logical replication launcher shutting down")));
|
||||
(errmsg_internal("logical replication launcher shutting down"),
|
||||
ERRDETAIL_SIGNAL_SENDER(sender_pid, sender_uid)));
|
||||
|
||||
/*
|
||||
* The logical replication launcher can be stopped at any time.
|
||||
@@ -3379,23 +3395,27 @@ ProcessInterrupts(void)
|
||||
else if (AmWalReceiverProcess())
|
||||
ereport(FATAL,
|
||||
(errcode(ERRCODE_ADMIN_SHUTDOWN),
|
||||
errmsg("terminating walreceiver process due to administrator command")));
|
||||
errmsg("terminating walreceiver process due to administrator command"),
|
||||
ERRDETAIL_SIGNAL_SENDER(sender_pid, sender_uid)));
|
||||
else if (AmBackgroundWorkerProcess())
|
||||
ereport(FATAL,
|
||||
(errcode(ERRCODE_ADMIN_SHUTDOWN),
|
||||
errmsg("terminating background worker \"%s\" due to administrator command",
|
||||
MyBgworkerEntry->bgw_type)));
|
||||
MyBgworkerEntry->bgw_type),
|
||||
ERRDETAIL_SIGNAL_SENDER(sender_pid, sender_uid)));
|
||||
else if (AmIoWorkerProcess())
|
||||
{
|
||||
ereport(DEBUG1,
|
||||
(errmsg_internal("io worker shutting down due to administrator command")));
|
||||
(errmsg_internal("io worker shutting down due to administrator command"),
|
||||
ERRDETAIL_SIGNAL_SENDER(sender_pid, sender_uid)));
|
||||
|
||||
proc_exit(0);
|
||||
}
|
||||
else
|
||||
ereport(FATAL,
|
||||
(errcode(ERRCODE_ADMIN_SHUTDOWN),
|
||||
errmsg("terminating connection due to administrator command")));
|
||||
errmsg("terminating connection due to administrator command"),
|
||||
ERRDETAIL_SIGNAL_SENDER(sender_pid, sender_uid)));
|
||||
}
|
||||
|
||||
if (CheckClientConnectionPending)
|
||||
|
||||
@@ -43,6 +43,8 @@ volatile sig_atomic_t IdleStatsUpdateTimeoutPending = false;
|
||||
volatile uint32 InterruptHoldoffCount = 0;
|
||||
volatile uint32 QueryCancelHoldoffCount = 0;
|
||||
volatile uint32 CritSectionCount = 0;
|
||||
volatile int ProcDieSenderPid = 0;
|
||||
volatile int ProcDieSenderUid = 0;
|
||||
|
||||
int MyProcPid;
|
||||
pg_time_t MyStartTime;
|
||||
|
||||
@@ -142,12 +142,11 @@ my ($ret, $out, $err) = $node->psql('postgres',
|
||||
is($ret, 2, 'server crash: psql exit code');
|
||||
like($out, qr/before/, 'server crash: output before crash');
|
||||
unlike($out, qr/AFTER/, 'server crash: no output after crash');
|
||||
is( $err,
|
||||
'psql:<stdin>:2: FATAL: terminating connection due to administrator command
|
||||
psql:<stdin>:2: server closed the connection unexpectedly
|
||||
like( $err, qr/psql:<stdin>:2: FATAL: terminating connection due to administrator command
|
||||
(?:DETAIL: Signal sent by PID \d+, UID \d+\.\n)?psql:<stdin>:2: server closed the connection unexpectedly
|
||||
This probably means the server terminated abnormally
|
||||
before or while processing the request.
|
||||
psql:<stdin>:2: error: connection to server was lost',
|
||||
psql:<stdin>:2: error: connection to server was lost/,
|
||||
'server crash: error message');
|
||||
|
||||
# test \errverbose
|
||||
|
||||
@@ -90,6 +90,8 @@
|
||||
extern PGDLLIMPORT volatile sig_atomic_t InterruptPending;
|
||||
extern PGDLLIMPORT volatile sig_atomic_t QueryCancelPending;
|
||||
extern PGDLLIMPORT volatile sig_atomic_t ProcDiePending;
|
||||
extern PGDLLIMPORT volatile int ProcDieSenderPid;
|
||||
extern PGDLLIMPORT volatile int ProcDieSenderUid;
|
||||
extern PGDLLIMPORT volatile sig_atomic_t IdleInTransactionSessionTimeoutPending;
|
||||
extern PGDLLIMPORT volatile sig_atomic_t TransactionTimeoutPending;
|
||||
extern PGDLLIMPORT volatile sig_atomic_t IdleSessionTimeoutPending;
|
||||
|
||||
@@ -354,6 +354,9 @@
|
||||
/* Define to 1 if you have the `rl_variable_bind' function. */
|
||||
#undef HAVE_RL_VARIABLE_BIND
|
||||
|
||||
/* Define to 1 if you have SA_SIGINFO available. */
|
||||
#undef HAVE_SA_SIGINFO
|
||||
|
||||
/* Define to 1 if you have the <security/pam_appl.h> header file. */
|
||||
#undef HAVE_SECURITY_PAM_APPL_H
|
||||
|
||||
|
||||
+32
-3
@@ -82,10 +82,19 @@ static volatile pqsigfunc pqsignal_handlers[PG_NSIG];
|
||||
*
|
||||
* This wrapper also handles restoring the value of errno.
|
||||
*/
|
||||
#if !defined(FRONTEND) && defined(HAVE_SA_SIGINFO)
|
||||
static void
|
||||
wrapper_handler(int signo, siginfo_t * info, void *context)
|
||||
#else
|
||||
static void
|
||||
wrapper_handler(SIGNAL_ARGS)
|
||||
#endif
|
||||
{
|
||||
int save_errno = errno;
|
||||
#if !defined(FRONTEND) && defined(HAVE_SA_SIGINFO)
|
||||
/* SA_SIGINFO signature uses signo, not SIGNAL_ARGS macro */
|
||||
int postgres_signal_arg = signo;
|
||||
#endif
|
||||
|
||||
Assert(postgres_signal_arg > 0);
|
||||
Assert(postgres_signal_arg < PG_NSIG);
|
||||
@@ -105,6 +114,14 @@ wrapper_handler(SIGNAL_ARGS)
|
||||
raise(postgres_signal_arg);
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef HAVE_SA_SIGINFO
|
||||
if (signo == SIGTERM && info)
|
||||
{
|
||||
ProcDieSenderPid = info->si_pid;
|
||||
ProcDieSenderUid = info->si_uid;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
(*pqsignal_handlers[postgres_signal_arg]) (postgres_signal_arg);
|
||||
@@ -125,6 +142,7 @@ pqsignal(int signo, pqsigfunc func)
|
||||
#if !(defined(WIN32) && defined(FRONTEND))
|
||||
struct sigaction act;
|
||||
#endif
|
||||
bool use_wrapper = false;
|
||||
|
||||
Assert(signo > 0);
|
||||
Assert(signo < PG_NSIG);
|
||||
@@ -132,13 +150,24 @@ pqsignal(int signo, pqsigfunc func)
|
||||
if (func != SIG_IGN && func != SIG_DFL)
|
||||
{
|
||||
pqsignal_handlers[signo] = func; /* assumed atomic */
|
||||
func = wrapper_handler;
|
||||
use_wrapper = true;
|
||||
}
|
||||
|
||||
#if !(defined(WIN32) && defined(FRONTEND))
|
||||
act.sa_handler = func;
|
||||
sigemptyset(&act.sa_mask);
|
||||
act.sa_flags = SA_RESTART;
|
||||
#if !defined(FRONTEND) && defined(HAVE_SA_SIGINFO)
|
||||
if (use_wrapper)
|
||||
{
|
||||
act.sa_sigaction = wrapper_handler;
|
||||
act.sa_flags |= SA_SIGINFO;
|
||||
}
|
||||
else
|
||||
act.sa_handler = func;
|
||||
#else
|
||||
act.sa_handler = use_wrapper ? wrapper_handler : func;
|
||||
#endif
|
||||
|
||||
#ifdef SA_NOCLDSTOP
|
||||
if (signo == SIGCHLD)
|
||||
act.sa_flags |= SA_NOCLDSTOP;
|
||||
@@ -147,7 +176,7 @@ pqsignal(int signo, pqsigfunc func)
|
||||
Assert(false); /* probably indicates coding error */
|
||||
#else
|
||||
/* Forward to Windows native signal system. */
|
||||
if (signal(signo, func) == SIG_ERR)
|
||||
if (signal(signo, use_wrapper ? wrapper_handler : func) == SIG_ERR)
|
||||
Assert(false); /* probably indicates coding error */
|
||||
#endif
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user