Fix query optimization for semijoins (#4287)

# Description of Changes

Fixes a planner bug where certain join queries could fail with:
```
Could not compute positional arguments during query planning
```

The reason was that in the process of rewriting the query with
semijoins, the fallback branch of the optimization rule didn't track the
dependencies of the join correctly. In particular it didn't mark the LHS
table in the join condition as a dependency when recursing into the LHS
of the join tree. This led to the table being projected out or dropped
in the LHS subtree while still being referenced by the root of the
subtree.

This only happened for bridge tables (tables used to link or bridge
other tables in the join, but whose columns are not referenced in the
final output). If the table was referenced in the output, it would be
included in the list of dependencies from the start and so this bug
would not manifest.

# API and ABI breaking changes

None

# Expected complexity level and risk

1

# Testing

Added a sql execution test
This commit is contained in:
joshua-spacetime
2026-02-18 12:06:36 -08:00
committed by GitHub
parent f9d4d2748d
commit 8287408ca2
2 changed files with 83 additions and 1 deletions
+79
View File
@@ -1303,6 +1303,85 @@ pub(crate) mod tests {
Ok(())
}
#[test]
fn test_multi_way_join_with_bridge_tables() -> anyhow::Result<()> {
let db = TestDB::durable()?;
let orders = db.create_table_for_test(
"orders",
&[
("o_orderkey", AlgebraicType::U64),
("o_custkey", AlgebraicType::U64),
("o_orderstatus", AlgebraicType::U64),
],
&[0.into(), 1.into(), 2.into()],
)?;
let customer = db.create_table_for_test(
"customer",
&[("c_custkey", AlgebraicType::U64), ("c_nationkey", AlgebraicType::U64)],
&[0.into(), 1.into()],
)?;
let nation = db.create_table_for_test(
"nation",
&[
("n_nationkey", AlgebraicType::U64),
("n_name", AlgebraicType::String),
("n_regionkey", AlgebraicType::U64),
],
&[0.into(), 2.into()],
)?;
let region = db.create_table_for_test(
"region",
&[("r_regionkey", AlgebraicType::U64), ("r_name", AlgebraicType::String)],
&[0.into()],
)?;
insert_rows(&db, orders, [product![1u64, 10u64, 0u64], product![2u64, 20u64, 1u64]])?;
insert_rows(&db, customer, [product![10u64, 100u64], product![20u64, 200u64]])?;
insert_rows(
&db,
nation,
[
product![100u64, "NATION_A", 1000u64],
product![200u64, "NATION_B", 2000u64],
],
)?;
insert_rows(
&db,
region,
[product![1000u64, "REGION_A"], product![2000u64, "REGION_B"]],
)?;
let result_three_way = run_for_testing(
&db,
"
SELECT customer.c_custkey, nation.n_name
FROM orders
JOIN customer ON customer.c_custkey = orders.o_custkey
JOIN nation ON nation.n_nationkey = customer.c_nationkey
WHERE orders.o_orderstatus = 0",
)?;
assert_eq!(result_three_way, vec![product![10u64, "NATION_A"]]);
let result_four_way = run_for_testing(
&db,
"
SELECT customer.c_custkey, region.r_name
FROM orders
JOIN customer ON customer.c_custkey = orders.o_custkey
JOIN nation ON nation.n_nationkey = customer.c_nationkey
JOIN region ON region.r_regionkey = nation.n_regionkey
WHERE orders.o_orderstatus = 0",
)?;
assert_eq!(result_four_way, vec![product![10u64, "REGION_A"]]);
Ok(())
}
#[test]
fn test_insert() -> ResultTest<()> {
let (db, mut input) = create_data(1)?;
+4 -1
View File
@@ -842,7 +842,10 @@ impl PhysicalPlan {
Self::IxJoin(IxJoin { lhs, ..join }, Semi::Lhs)
}
Self::IxJoin(join, Semi::All) => {
let reqs = reqs.into_iter().filter(|label| label != &join.rhs_label).collect();
let mut reqs: Vec<_> = reqs.into_iter().filter(|label| label != &join.rhs_label).collect();
if !reqs.contains(&join.lhs_field.label) {
reqs.push(join.lhs_field.label);
}
let lhs = join.lhs.introduce_semijoins(reqs);
let lhs = Box::new(lhs);
Self::IxJoin(IxJoin { lhs, ..join }, Semi::All)