mirror of
https://github.com/postgres/postgres.git
synced 2026-06-23 00:51:00 -04:00
Fix PANIC with track_functions due to concurrent drop of pgstats entries
pgstat_drop_entry_internal() generates an ERROR if facing a pgstats entry already marked as dropped. With a workload doing a lot of concurrent CALL and DROP/CREATE PROCEDURE, it could be possible for AtEOXact_PgStat_DroppedStats(), that wants to do transactional drops, to find entries that are already dropped, after a commit record has been written. In this case, ERRORs are upgraded to PANIC, taking down the server. This issue is fixed by making pgstat_drop_entry() optionally more tolerant to concurrent drops, adding to the routine a missing_ok option to make some of its callers more tolerant (spoiler: some of the callers want a strict behavior, like replication slots and backend stats). pgstat_drop_entry_internal() cannot be called anymore for an entry marked as dropped, hence its error is replaced by an assertion. Functions are handled as a special case in core; this problem could also apply to custom stats kinds depending on what an extension does. track_functions is costly when enabled (disabled by default), which is perhaps the main reason why this has not be found yet. A similar version of this patch has been proposed by Sami Imseih on a different thread for a feature in development. This version has tweaked here by me for the sake of fixing this issue. Reported-by: zhanglihui <zlh21343@163.com> Author: Sami Imseih <samimseih@gmail.com> Author: Michael Paquier <michael@paquier.xyz> Reviewed-by: Ayush Tiwari <ayushtiwari.slg01@gmail.com> Discussion: https://postgr.es/m/19520-73873648d44793cf@postgresql.org Backpatch-through: 15
This commit is contained in:
@@ -650,7 +650,7 @@ pgstat_shutdown_hook(int code, Datum arg)
|
||||
dlist_init(&pgStatPending);
|
||||
|
||||
/* drop the backend stats entry */
|
||||
if (!pgstat_drop_entry(PGSTAT_KIND_BACKEND, InvalidOid, MyProcNumber))
|
||||
if (!pgstat_drop_entry(PGSTAT_KIND_BACKEND, InvalidOid, MyProcNumber, false))
|
||||
pgstat_request_entry_refs_gc();
|
||||
|
||||
pgstat_detach_shmem();
|
||||
|
||||
@@ -113,7 +113,7 @@ pgstat_init_function_usage(FunctionCallInfo fcinfo,
|
||||
if (!SearchSysCacheExists1(PROCOID, ObjectIdGetDatum(fcinfo->flinfo->fn_oid)))
|
||||
{
|
||||
pgstat_drop_entry(PGSTAT_KIND_FUNCTION, MyDatabaseId,
|
||||
fcinfo->flinfo->fn_oid);
|
||||
fcinfo->flinfo->fn_oid, true);
|
||||
ereport(ERROR, errcode(ERRCODE_UNDEFINED_FUNCTION),
|
||||
errmsg("function call to dropped function"));
|
||||
}
|
||||
|
||||
@@ -188,7 +188,7 @@ pgstat_drop_replslot(ReplicationSlot *slot)
|
||||
Assert(LWLockHeldByMeInMode(ReplicationSlotAllocationLock, LW_EXCLUSIVE));
|
||||
|
||||
if (!pgstat_drop_entry(PGSTAT_KIND_REPLSLOT, InvalidOid,
|
||||
ReplicationSlotIndex(slot)))
|
||||
ReplicationSlotIndex(slot), false))
|
||||
pgstat_request_entry_refs_gc();
|
||||
}
|
||||
|
||||
|
||||
@@ -917,14 +917,7 @@ pgstat_drop_entry_internal(PgStatShared_HashEntry *shent,
|
||||
* Signal that the entry is dropped - this will eventually cause other
|
||||
* backends to release their references.
|
||||
*/
|
||||
if (shent->dropped)
|
||||
elog(ERROR,
|
||||
"trying to drop stats entry already dropped: kind=%s dboid=%u objid=%" PRIu64 " refcount=%u generation=%u",
|
||||
pgstat_get_kind_info(shent->key.kind)->name,
|
||||
shent->key.dboid,
|
||||
shent->key.objid,
|
||||
pg_atomic_read_u32(&shent->refcount),
|
||||
pg_atomic_read_u32(&shent->generation));
|
||||
Assert(!shent->dropped);
|
||||
shent->dropped = true;
|
||||
|
||||
/* release refcount marking entry as not dropped */
|
||||
@@ -1000,13 +993,16 @@ pgstat_drop_database_and_contents(Oid dboid)
|
||||
* This routine returns false if the stats entry of the dropped object could
|
||||
* not be freed, true otherwise.
|
||||
*
|
||||
* If missing_ok is true, skip entries that have been concurrently dropped.
|
||||
*
|
||||
* The callers of this function should call pgstat_request_entry_refs_gc()
|
||||
* if the stats entry could not be freed, to ensure that this entry's memory
|
||||
* can be reclaimed later by a different backend calling
|
||||
* pgstat_gc_entry_refs().
|
||||
*/
|
||||
bool
|
||||
pgstat_drop_entry(PgStat_Kind kind, Oid dboid, uint64 objid)
|
||||
pgstat_drop_entry(PgStat_Kind kind, Oid dboid, uint64 objid,
|
||||
bool missing_ok)
|
||||
{
|
||||
PgStat_HashKey key = {0};
|
||||
PgStatShared_HashEntry *shent;
|
||||
@@ -1031,6 +1027,20 @@ pgstat_drop_entry(PgStat_Kind kind, Oid dboid, uint64 objid)
|
||||
shent = dshash_find(pgStatLocal.shared_hash, &key, true);
|
||||
if (shent)
|
||||
{
|
||||
if (shent->dropped)
|
||||
{
|
||||
if (!missing_ok)
|
||||
elog(ERROR,
|
||||
"trying to drop stats entry already dropped: kind=%s dboid=%u objid=%" PRIu64 " refcount=%u generation=%u",
|
||||
pgstat_get_kind_info(shent->key.kind)->name,
|
||||
shent->key.dboid,
|
||||
shent->key.objid,
|
||||
pg_atomic_read_u32(&shent->refcount),
|
||||
pg_atomic_read_u32(&shent->generation));
|
||||
dshash_release_lock(pgStatLocal.shared_hash, shent);
|
||||
return true;
|
||||
}
|
||||
|
||||
freed = pgstat_drop_entry_internal(shent, NULL);
|
||||
|
||||
/*
|
||||
|
||||
@@ -85,7 +85,7 @@ AtEOXact_PgStat_DroppedStats(PgStat_SubXactStatus *xact_state, bool isCommit)
|
||||
* Transaction that dropped an object committed. Drop the stats
|
||||
* too.
|
||||
*/
|
||||
if (!pgstat_drop_entry(it->kind, it->dboid, objid))
|
||||
if (!pgstat_drop_entry(it->kind, it->dboid, objid, true))
|
||||
not_freed_count++;
|
||||
}
|
||||
else if (!isCommit && pending->is_create)
|
||||
@@ -94,7 +94,7 @@ AtEOXact_PgStat_DroppedStats(PgStat_SubXactStatus *xact_state, bool isCommit)
|
||||
* Transaction that created an object aborted. Drop the stats
|
||||
* associated with the object.
|
||||
*/
|
||||
if (!pgstat_drop_entry(it->kind, it->dboid, objid))
|
||||
if (!pgstat_drop_entry(it->kind, it->dboid, objid, true))
|
||||
not_freed_count++;
|
||||
}
|
||||
|
||||
@@ -160,7 +160,7 @@ AtEOSubXact_PgStat_DroppedStats(PgStat_SubXactStatus *xact_state,
|
||||
* Subtransaction creating a new stats object aborted. Drop the
|
||||
* stats object.
|
||||
*/
|
||||
if (!pgstat_drop_entry(it->kind, it->dboid, objid))
|
||||
if (!pgstat_drop_entry(it->kind, it->dboid, objid, true))
|
||||
not_freed_count++;
|
||||
pfree(pending);
|
||||
}
|
||||
@@ -323,7 +323,7 @@ pgstat_execute_transactional_drops(int ndrops, struct xl_xact_stats_item *items,
|
||||
xl_xact_stats_item *it = &items[i];
|
||||
uint64 objid = ((uint64) it->objid_hi) << 32 | it->objid_lo;
|
||||
|
||||
if (!pgstat_drop_entry(it->kind, it->dboid, objid))
|
||||
if (!pgstat_drop_entry(it->kind, it->dboid, objid, true))
|
||||
not_freed_count++;
|
||||
}
|
||||
|
||||
|
||||
@@ -807,7 +807,8 @@ extern PgStat_EntryRef *pgstat_get_entry_ref(PgStat_Kind kind, Oid dboid, uint64
|
||||
extern bool pgstat_lock_entry(PgStat_EntryRef *entry_ref, bool nowait);
|
||||
extern bool pgstat_lock_entry_shared(PgStat_EntryRef *entry_ref, bool nowait);
|
||||
extern void pgstat_unlock_entry(PgStat_EntryRef *entry_ref);
|
||||
extern bool pgstat_drop_entry(PgStat_Kind kind, Oid dboid, uint64 objid);
|
||||
extern bool pgstat_drop_entry(PgStat_Kind kind, Oid dboid, uint64 objid,
|
||||
bool missing_ok);
|
||||
extern void pgstat_drop_all_entries(void);
|
||||
extern void pgstat_drop_matching_entries(bool (*do_drop) (PgStatShared_HashEntry *, Datum),
|
||||
Datum match_data);
|
||||
|
||||
@@ -600,7 +600,7 @@ test_custom_stats_var_drop(PG_FUNCTION_ARGS)
|
||||
|
||||
/* Drop entry and request GC if the entry could not be freed */
|
||||
if (!pgstat_drop_entry(PGSTAT_KIND_TEST_CUSTOM_VAR_STATS, InvalidOid,
|
||||
PGSTAT_CUSTOM_VAR_STATS_IDX(stat_name)))
|
||||
PGSTAT_CUSTOM_VAR_STATS_IDX(stat_name), false))
|
||||
pgstat_request_entry_refs_gc();
|
||||
|
||||
PG_RETURN_VOID();
|
||||
|
||||
Reference in New Issue
Block a user