From 8287408ca2be7b3aa4e715dfdddba857b27af899 Mon Sep 17 00:00:00 2001 From: joshua-spacetime Date: Wed, 18 Feb 2026 12:06:36 -0800 Subject: [PATCH] Fix query optimization for semijoins (#4287) # Description of Changes Fixes a planner bug where certain join queries could fail with: ``` Could not compute positional arguments during query planning ``` The reason was that in the process of rewriting the query with semijoins, the fallback branch of the optimization rule didn't track the dependencies of the join correctly. In particular it didn't mark the LHS table in the join condition as a dependency when recursing into the LHS of the join tree. This led to the table being projected out or dropped in the LHS subtree while still being referenced by the root of the subtree. This only happened for bridge tables (tables used to link or bridge other tables in the join, but whose columns are not referenced in the final output). If the table was referenced in the output, it would be included in the list of dependencies from the start and so this bug would not manifest. # API and ABI breaking changes None # Expected complexity level and risk 1 # Testing Added a sql execution test --- crates/core/src/sql/execute.rs | 79 ++++++++++++++++++++++++++++++++ crates/physical-plan/src/plan.rs | 5 +- 2 files changed, 83 insertions(+), 1 deletion(-) diff --git a/crates/core/src/sql/execute.rs b/crates/core/src/sql/execute.rs index 64b4bf8fe..f42b0c10a 100644 --- a/crates/core/src/sql/execute.rs +++ b/crates/core/src/sql/execute.rs @@ -1303,6 +1303,85 @@ pub(crate) mod tests { Ok(()) } + #[test] + fn test_multi_way_join_with_bridge_tables() -> anyhow::Result<()> { + let db = TestDB::durable()?; + + let orders = db.create_table_for_test( + "orders", + &[ + ("o_orderkey", AlgebraicType::U64), + ("o_custkey", AlgebraicType::U64), + ("o_orderstatus", AlgebraicType::U64), + ], + &[0.into(), 1.into(), 2.into()], + )?; + + let customer = db.create_table_for_test( + "customer", + &[("c_custkey", AlgebraicType::U64), ("c_nationkey", AlgebraicType::U64)], + &[0.into(), 1.into()], + )?; + + let nation = db.create_table_for_test( + "nation", + &[ + ("n_nationkey", AlgebraicType::U64), + ("n_name", AlgebraicType::String), + ("n_regionkey", AlgebraicType::U64), + ], + &[0.into(), 2.into()], + )?; + + let region = db.create_table_for_test( + "region", + &[("r_regionkey", AlgebraicType::U64), ("r_name", AlgebraicType::String)], + &[0.into()], + )?; + + insert_rows(&db, orders, [product![1u64, 10u64, 0u64], product![2u64, 20u64, 1u64]])?; + insert_rows(&db, customer, [product![10u64, 100u64], product![20u64, 200u64]])?; + insert_rows( + &db, + nation, + [ + product![100u64, "NATION_A", 1000u64], + product![200u64, "NATION_B", 2000u64], + ], + )?; + insert_rows( + &db, + region, + [product![1000u64, "REGION_A"], product![2000u64, "REGION_B"]], + )?; + + let result_three_way = run_for_testing( + &db, + " + SELECT customer.c_custkey, nation.n_name + FROM orders + JOIN customer ON customer.c_custkey = orders.o_custkey + JOIN nation ON nation.n_nationkey = customer.c_nationkey + WHERE orders.o_orderstatus = 0", + )?; + + assert_eq!(result_three_way, vec![product![10u64, "NATION_A"]]); + + let result_four_way = run_for_testing( + &db, + " + SELECT customer.c_custkey, region.r_name + FROM orders + JOIN customer ON customer.c_custkey = orders.o_custkey + JOIN nation ON nation.n_nationkey = customer.c_nationkey + JOIN region ON region.r_regionkey = nation.n_regionkey + WHERE orders.o_orderstatus = 0", + )?; + + assert_eq!(result_four_way, vec![product![10u64, "REGION_A"]]); + Ok(()) + } + #[test] fn test_insert() -> ResultTest<()> { let (db, mut input) = create_data(1)?; diff --git a/crates/physical-plan/src/plan.rs b/crates/physical-plan/src/plan.rs index 66494872b..af1c585b7 100644 --- a/crates/physical-plan/src/plan.rs +++ b/crates/physical-plan/src/plan.rs @@ -842,7 +842,10 @@ impl PhysicalPlan { Self::IxJoin(IxJoin { lhs, ..join }, Semi::Lhs) } Self::IxJoin(join, Semi::All) => { - let reqs = reqs.into_iter().filter(|label| label != &join.rhs_label).collect(); + let mut reqs: Vec<_> = reqs.into_iter().filter(|label| label != &join.rhs_label).collect(); + if !reqs.contains(&join.lhs_field.label) { + reqs.push(join.lhs_field.label); + } let lhs = join.lhs.introduce_semijoins(reqs); let lhs = Box::new(lhs); Self::IxJoin(IxJoin { lhs, ..join }, Semi::All)