Implement native multi-table reflection API for the mssql dialect

### Description

Adds 5 native `get_multi_*` reflection methods (columns, pk, fk, indexes, table_comment) for the MSSQL dialect, replacing the per-table loop in `_default_multi_reflect`. Single-table methods now delegate to the multi versions (PG/Oracle pattern); legacy per-table SQL is retained as `_internal_get_*` helpers, used only for tempdb reflection.

Not implemented here: `get_multi_unique_constraints`, `get_multi_check_constraints`, `get_multi_table_options` -MSSQL has no single-table counterparts to delegate from. Happy to add as a follow-up.

### Performance

Measured with `test/perf/many_table_reflection.py` against SQL Server 2022 (Docker, localhost, pyodbc + ODBC Driver 18) on a 250-table fixture, 15-50 cols, with PKs/FKs/indexes/comments:

| | single | multi | speedup |
|---|---|---|---|
| `get_columns` | 1.33s | 0.35s | 3.8x |
| `get_pk_constraint` | 1.41s | 0.08s | 18x |
| `get_foreign_keys` | 7.07s | 0.19s | 37x |
| `get_indexes` | 0.80s | 0.09s | 8.6x |
| `get_table_comment` | 0.71s | 0.05s | 14x |
| **MetaData.reflect** | **12.62s** | **1.15s** | **11x** |

### Checklist

This pull request is:

- [x] A new feature implementation
	- Fixes: #8430
	- Tests added in `test/dialect/mssql/test_reflection.py`
	- Changelog entry: `doc/build/changelog/unreleased_21/8430.rst`

Closes: #13297
Pull-request: https://github.com/sqlalchemy/sqlalchemy/pull/13297
Pull-request-sha: 2c6c69f159

Change-Id: I525c60fc5ece94dd250f376b05b64b09e65ca0d7
This commit is contained in:
Gaurav Sharma
2026-06-03 08:52:09 -04:00
committed by Federico Caselli
parent 4fb459aaf0
commit c84c7b2ffc
4 changed files with 1405 additions and 430 deletions
+17
View File
@@ -0,0 +1,17 @@
.. change::
:tags: performance, mssql, reflection
:tickets: 8430
Implemented native multi-table reflection methods for the SQL Server
dialect, providing :meth:`.MSDialect.get_multi_columns`,
:meth:`.MSDialect.get_multi_pk_constraint`,
:meth:`.MSDialect.get_multi_foreign_keys`,
:meth:`.MSDialect.get_multi_indexes` and
:meth:`.MSDialect.get_multi_table_comment`. Previously the SQL Server
dialect relied on the default dialect default implementation
which calls the per-table methods in a loop; the new implementations
issue a single bulk query per object type against the ``sys.*``
catalog views, avoiding the per-table round trips. The single-table
reflection methods are now thin wrappers over the multi-table ones,
matching the pattern used by the PostgreSQL and Oracle dialects.
Pull request courtesy Gaurav Sharma.
File diff suppressed because it is too large Load Diff
@@ -282,3 +282,64 @@ extended_properties = Table(
Column("value", NVarcharSqlVariant),
schema="sys",
)
sys_schemas = Table(
"schemas",
ischema,
Column("schema_id", Integer),
Column("name", CoerceUnicode),
Column("principal_id", Integer),
schema="sys",
)
sys_objects = Table(
"objects",
ischema,
Column("object_id", Integer),
Column("name", CoerceUnicode),
Column("schema_id", Integer),
Column("parent_object_id", Integer),
Column("type", String), # CHAR(2)
Column("type_desc", CoerceUnicode),
schema="sys",
)
sys_key_constraints = Table(
"key_constraints",
ischema,
Column("object_id", Integer),
Column("name", CoerceUnicode),
Column("schema_id", Integer),
Column("parent_object_id", Integer),
Column("type", String), # CHAR(2) ('PK', 'UQ')
Column("unique_index_id", Integer),
schema="sys",
)
sys_indexes = Table(
"indexes",
ischema,
Column("object_id", Integer),
Column("index_id", Integer),
Column("name", CoerceUnicode),
Column("type", Integer), # TINYINT: 1=clustered, 2=nonclustered
Column("type_desc", CoerceUnicode),
Column("is_unique", Boolean),
Column("is_primary_key", Boolean),
Column("is_unique_constraint", Boolean),
Column("filter_definition", CoerceUnicode),
schema="sys",
)
sys_index_columns = Table(
"index_columns",
ischema,
Column("object_id", Integer),
Column("index_id", Integer),
Column("index_column_id", Integer),
Column("column_id", Integer),
Column("key_ordinal", Integer),
Column("is_descending_key", Boolean),
Column("is_included_column", Boolean),
schema="sys",
)
+173
View File
@@ -22,6 +22,8 @@ from sqlalchemy import types as sqltypes
from sqlalchemy.dialects import mssql
from sqlalchemy.dialects.mssql import base
from sqlalchemy.dialects.mssql.information_schema import tables
from sqlalchemy.engine import ObjectKind
from sqlalchemy.engine import ObjectScope
from sqlalchemy.pool import NullPool
from sqlalchemy.schema import CreateIndex
from sqlalchemy.testing import AssertsCompiledSQL
@@ -293,6 +295,177 @@ class ReflectionTest(fixtures.TestBase, ComparesTables, AssertsCompiledSQL):
[(2, "bar", datetime.datetime(2020, 2, 2, 2, 2, 2))],
)
def test_get_multi_columns_temp_table(self, metadata, connection):
"""Direct ``get_multi_columns`` API works for temp tables across
scope/kind combinations.
Regression coverage for the case where a ``#``-prefixed name was
passed to a multi method outside the autoload-shaped
``scope=ANY/kind=ANY`` call.
"""
tt = Table(
"#mr_tmp",
metadata,
Column("id", Integer, primary_key=True),
Column("name", mssql.NVARCHAR(50)),
)
tt.create(connection)
insp = inspect(connection)
# ANY scope, TABLE kind: should return the temp table
r = dict(
insp.get_multi_columns(
filter_names=["#mr_tmp"],
scope=ObjectScope.ANY,
kind=ObjectKind.TABLE,
)
)
eq_(set(r.keys()), {(None, "#mr_tmp")})
eq_(
[c["name"] for c in r[(None, "#mr_tmp")]],
["id", "name"],
)
# TEMPORARY scope: should also return the temp table
r = dict(
insp.get_multi_columns(
filter_names=["#mr_tmp"],
scope=ObjectScope.TEMPORARY,
kind=ObjectKind.TABLE,
)
)
eq_(set(r.keys()), {(None, "#mr_tmp")})
# DEFAULT scope: must EXCLUDE the temp table
r = dict(
insp.get_multi_columns(
filter_names=["#mr_tmp"],
scope=ObjectScope.DEFAULT,
kind=ObjectKind.TABLE,
)
)
eq_(r, {})
# VIEW kind: must EXCLUDE the temp table (no temp views on mssql)
r = dict(
insp.get_multi_columns(
filter_names=["#mr_tmp"],
scope=ObjectScope.ANY,
kind=ObjectKind.VIEW,
)
)
eq_(r, {})
def test_get_multi_pk_constraint_temp_table(self, metadata, connection):
tt = Table(
"#mr_pk_tmp",
metadata,
Column("id", Integer, primary_key=True),
Column("val", Integer),
)
tt.create(connection)
insp = inspect(connection)
r = dict(
insp.get_multi_pk_constraint(
filter_names=["#mr_pk_tmp"],
scope=ObjectScope.ANY,
kind=ObjectKind.TABLE,
)
)
in_((None, "#mr_pk_tmp"), r)
def test_temp_reflection_does_not_leak_translate_map(
self, metadata, connection
):
"""Reflecting a temp table must not leave ``schema_translate_map``
applied on the caller's connection.
The unified multi reflection path runs the tempdb pass with
``schema_translate_map={"sys": "tempdb.sys"}`` as an
execute-level option. Applying it on the connection (e.g. via
``connection.execution_options(...)``) mutates the connection
in place and poisons subsequent Core sys.* queries.
"""
tt = Table(
"#mr_leak_tmp",
metadata,
Column("id", Integer, primary_key=True),
Column("data", mssql.NVARCHAR(50)),
)
tt.create(connection)
before = dict(connection.get_execution_options())
insp = inspect(connection)
# Each of these would route via the tempdb pass.
insp.get_multi_columns(
filter_names=["#mr_leak_tmp"],
scope=ObjectScope.ANY,
kind=ObjectKind.TABLE,
)
insp.get_multi_pk_constraint(
filter_names=["#mr_leak_tmp"],
scope=ObjectScope.ANY,
kind=ObjectKind.TABLE,
)
insp.get_multi_indexes(
filter_names=["#mr_leak_tmp"],
scope=ObjectScope.ANY,
kind=ObjectKind.TABLE,
)
insp.get_multi_table_comment(
filter_names=["#mr_leak_tmp"],
scope=ObjectScope.ANY,
kind=ObjectKind.TABLE,
)
after = dict(connection.get_execution_options())
eq_(before, after)
def test_temp_reflection_with_caller_translate_map(
self, metadata, connection
):
"""Temp table reflection must work even when the caller has a
``schema_translate_map`` set on the connection that contains a
``"sys"`` key.
Statement-level execution options DO NOT override connection
options for ``schema_translate_map`` (the connection wins
during option merge). Execute-level options DO override, so the
tempdb pass must pass its map at execute time, not on the
statement or the connection.
"""
tt = Table(
"#mr_hostile_map",
metadata,
Column("id", Integer, primary_key=True),
Column("val", Integer),
)
tt.create(connection)
# Hostile setup: caller has set a translate_map with a "sys"
# key for their own purposes. Our temp pass must still reach
# tempdb.sys.* despite this.
hostile = connection.execution_options(
schema_translate_map={"sys": "INFORMATION_SCHEMA"}
)
insp = inspect(hostile)
r = dict(
insp.get_multi_columns(
filter_names=["#mr_hostile_map"],
scope=ObjectScope.ANY,
kind=ObjectKind.TABLE,
)
)
eq_(set(r.keys()), {(None, "#mr_hostile_map")})
eq_(
[c["name"] for c in r[(None, "#mr_hostile_map")]],
["id", "val"],
)
@testing.combinations(
("local_temp", "#tmp", True),
("global_temp", "##tmp", True),