From d75146456fa5ebcb50a084cd4e2db61ab108c233 Mon Sep 17 00:00:00 2001 From: Michael Paquier Date: Wed, 17 Jun 2026 08:42:13 +0900 Subject: [PATCH] Fix another instability in recovery TAP test 004_timeline_switch The test did not wait for the standby to be connected to the primary. This breaks one assumption at the beginning of the test, where the primary is stopped to ensure that all its records are flushed to both standbys before moving on with its next steps. If standby_1 finishes ahead of standby_2, the test would be able work fine as the former waits for the latter. The opposite is not true, standby_2 getting ahead of standby_1 would cause the test to fail on timeout when standby_1 attempts to connect to standby_2. This commit adds an additional polling query after the two standbys are started, checking that both standbys are connected to the primary before processing with the initial steps of the test. Like 7185eddf0522, backpatch down to v14. Author: Sergey Tatarintsev Reviewed-by: Ewan Young Discussion: https://postgr.es/m/fea4190e-f8b5-4432-a52d-bcbee5f34366@postgrespro.ru Backpatch-through: 14 --- src/test/recovery/t/004_timeline_switch.pl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/test/recovery/t/004_timeline_switch.pl b/src/test/recovery/t/004_timeline_switch.pl index bbff6e97cd3..ef994854730 100644 --- a/src/test/recovery/t/004_timeline_switch.pl +++ b/src/test/recovery/t/004_timeline_switch.pl @@ -33,6 +33,10 @@ $node_standby_2->init_from_backup($node_primary, $backup_name, has_streaming => 1); $node_standby_2->start; +# Wait for standby_1 and standby_2 connection to the primary. +$node_primary->poll_query_until('postgres', + "SELECT count(1) = 2 FROM pg_stat_replication"); + # Create some content on primary $node_primary->safe_psql('postgres', "CREATE TABLE tab_int AS SELECT generate_series(1,1000) AS a");