mirror of
https://github.com/valkey-io/valkey.git
synced 2026-05-06 05:26:42 -04:00
Fix: prevent NULL dereference crash in connectSlotExportJob when target node disappears (#3596)
### Summary This PR fixes a NULL pointer dereference (SIGSEGV) in `connectSlotExportJob()` (`src/cluster_migrateslots.c`) that can crash a Valkey cluster node, causing a denial-of-service condition. ### Root Cause When `CLUSTER MIGRATESLOTS` is issued, a migration job is created with state `SLOT_EXPORT_CONNECTING`. On the next `clusterCron()` tick, `proceedWithSlotMigration()` calls `connectSlotExportJob()`, which looks up the target node via `clusterLookupNode()`. `clusterLookupNode()` can legitimately return `NULL` — for example, if the target node is removed from the cluster (e.g. via `CLUSTER FORGET`) between the time the migration job is created and the time the cron fires. This is a realistic race condition in any cluster topology change scenario. The return value was **never checked**, so the subsequent call to `getNodeDefaultReplicationPort(n)` immediately dereferences the NULL pointer, crashing the process: ```c // Before fix — vulnerable clusterNode *n = clusterLookupNode(job->target_node_name, CLUSTER_NAMELEN); int port = getNodeDefaultReplicationPort(n); // SIGSEGV if n == NULL serverLog(..., n->ip, port); // second dereference Signed-off-by: chenshi5012 <chenshi5012@163.com>
This commit is contained in:
@@ -1303,6 +1303,13 @@ void slotExportConnectHandler(connection *conn) {
|
||||
* the job as private data. */
|
||||
int connectSlotExportJob(slotMigrationJob *job) {
|
||||
clusterNode *n = clusterLookupNode(job->target_node_name, CLUSTER_NAMELEN);
|
||||
if (n == NULL) {
|
||||
serverLog(LL_WARNING,
|
||||
"Slot migration %s: target node %.40s not found in cluster, "
|
||||
"aborting connection attempt.",
|
||||
job->description, job->target_node_name);
|
||||
return C_ERR;
|
||||
}
|
||||
int port = getNodeDefaultReplicationPort(n);
|
||||
serverLog(LL_NOTICE, "Connecting slot migration %s (ip: %s, port %d)",
|
||||
job->description,
|
||||
@@ -1995,10 +2002,11 @@ void proceedWithSlotMigration(slotMigrationJob *job) {
|
||||
status = proceedWithSlotExportJobConnecting(job, &completed);
|
||||
}
|
||||
if (status == C_ERR) {
|
||||
const char *conn_err = job->conn ? connGetLastError(job->conn) : "target node not found";
|
||||
sds status_msg =
|
||||
sdscatfmt(sdsempty(),
|
||||
"Unable to connect to target node: %s",
|
||||
connGetLastError(job->conn));
|
||||
conn_err);
|
||||
finishSlotMigrationJob(job, SLOT_MIGRATION_JOB_FAILED,
|
||||
status_msg);
|
||||
sdsfree(status_msg);
|
||||
|
||||
Reference in New Issue
Block a user