- Added new option to :func:.relationship `distinct_target_key`.

This enables the subquery eager loader strategy to apply a DISTINCT
to the innermost SELECT subquery, to assist in the case where
duplicate rows are generated by the innermost query which corresponds
to this relationship (there's not yet a general solution to the issue
of dupe rows within subquery eager loading, however, when joins outside
of the innermost subquery produce dupes).  When the flag
is set to ``True``, the DISTINCT is rendered unconditionally, and when
it is set to ``None``, DISTINCT is rendered if the innermost relationship
targets columns that do not comprise a full primary key.
The option defaults to False in 0.8 (e.g. off by default in all cases),
None in 0.9 (e.g. automatic by default).   Thanks to Alexander Koval
for help with this. [ticket:2836]
This commit is contained in:
Mike Bayer
2013-10-13 16:42:32 -04:00
parent 366e74b1be
commit f00544a589
4 changed files with 257 additions and 5 deletions
+19
View File
@@ -10,6 +10,25 @@
.. changelog::
:version: 0.8.3
.. change::
:tags: feature, orm
:tickets: 2836
:versions: 0.9.0
Added new option to :func:`.relationship` ``distinct_target_key``.
This enables the subquery eager loader strategy to apply a DISTINCT
to the innermost SELECT subquery, to assist in the case where
duplicate rows are generated by the innermost query which corresponds
to this relationship (there's not yet a general solution to the issue
of dupe rows within subquery eager loading, however, when joins outside
of the innermost subquery produce dupes). When the flag
is set to ``True``, the DISTINCT is rendered unconditionally, and when
it is set to ``None``, DISTINCT is rendered if the innermost relationship
targets columns that do not comprise a full primary key.
The option defaults to False in 0.8 (e.g. off by default in all cases),
None in 0.9 (e.g. automatic by default). Thanks to Alexander
for help with this.
.. change::
:tags: bug, mysql
:tickets: 2515
+23
View File
@@ -103,6 +103,7 @@ class RelationshipProperty(StrategizedProperty):
enable_typechecks=True, join_depth=None,
comparator_factory=None,
single_parent=False, innerjoin=False,
distinct_target_key=None,
doc=None,
active_history=False,
cascade_backrefs=True,
@@ -372,6 +373,27 @@ class RelationshipProperty(StrategizedProperty):
or when the reference is one-to-one or a collection that is guaranteed
to have one or at least one entry.
:param distinct_target_key=None:
Indicate if a "subquery" eager load should apply the DISTINCT
keyword to the innermost SELECT statement. When left as ``None``,
the DISTINCT keyword will be applied in those cases when the target
columns do not comprise the full primary key of the target table.
When set to ``True``, the DISTINCT keyword is applied to the innermost
SELECT unconditionally.
It may be desirable to set this flag to False when the DISTINCT is
reducing performance of the innermost subquery beyond that of what
duplicate innermost rows may be causing.
.. versionadded:: 0.8.3 - distinct_target_key allows the
subquery eager loader to apply a DISTINCT modifier to the
innermost SELECT.
.. versionchanged:: 0.9.0 - distinct_target_key now defaults to
``None``, so that the feature enables itself automatically for
those cases where the innermost query targets a non-unique
key.
:param join_depth:
when non-``None``, an integer value indicating how many levels
deep "eager" loaders should join on a self-referring or cyclical
@@ -621,6 +643,7 @@ class RelationshipProperty(StrategizedProperty):
self.enable_typechecks = enable_typechecks
self.query_class = query_class
self.innerjoin = innerjoin
self.distinct_target_key = distinct_target_key
self.doc = doc
self.active_history = active_history
self.join_depth = join_depth
+21 -5
View File
@@ -692,7 +692,7 @@ class SubqueryLoader(AbstractRelationshipLoader):
elif subq_path.contains_mapper(self.mapper):
return
subq_mapper, leftmost_mapper, leftmost_attr = \
subq_mapper, leftmost_mapper, leftmost_attr, leftmost_relationship = \
self._get_leftmost(subq_path)
orig_query = context.attributes.get(
@@ -703,7 +703,8 @@ class SubqueryLoader(AbstractRelationshipLoader):
# produce a subquery from it.
left_alias = self._generate_from_original_query(
orig_query, leftmost_mapper,
leftmost_attr, entity.mapper
leftmost_attr, leftmost_relationship,
entity.mapper
)
# generate another Query that will join the
@@ -752,11 +753,12 @@ class SubqueryLoader(AbstractRelationshipLoader):
leftmost_mapper._columntoproperty[c].class_attribute
for c in leftmost_cols
]
return subq_mapper, leftmost_mapper, leftmost_attr
return subq_mapper, leftmost_mapper, leftmost_attr, leftmost_prop
def _generate_from_original_query(self,
orig_query, leftmost_mapper,
leftmost_attr, entity_mapper
leftmost_attr, leftmost_relationship,
entity_mapper
):
# reformat the original query
# to look only for significant columns
@@ -767,8 +769,22 @@ class SubqueryLoader(AbstractRelationshipLoader):
if not q._from_obj and entity_mapper.isa(leftmost_mapper):
q._set_select_from([entity_mapper], False)
target_cols = q._adapt_col_list(leftmost_attr)
# select from the identity columns of the outer
q._set_entities(q._adapt_col_list(leftmost_attr))
q._set_entities(target_cols)
distinct_target_key = leftmost_relationship.distinct_target_key
if distinct_target_key is True:
q._distinct = True
elif distinct_target_key is None:
# if target_cols refer to a non-primary key or only
# part of a composite primary key, set the q as distinct
for t in set(c.table for c in target_cols):
if not set(target_cols).issuperset(t.primary_key):
q._distinct = True
break
if q._order_by is False:
q._order_by = leftmost_mapper.order_by
+194
View File
@@ -1563,3 +1563,197 @@ class CyclicalInheritingEagerTestTwo(fixtures.DeclarativeMappedTest,
d = session.query(Director).options(subqueryload('*')).first()
assert len(list(session)) == 3
class SubqueryloadDistinctTest(fixtures.DeclarativeMappedTest,
testing.AssertsCompiledSQL):
__dialect__ = 'default'
run_inserts = 'once'
run_deletes = None
@classmethod
def setup_classes(cls):
Base = cls.DeclarativeBasic
class Director(Base):
__tablename__ = 'director'
id = Column(Integer, primary_key=True,
test_needs_autoincrement=True)
name = Column(String(50))
class DirectorPhoto(Base):
__tablename__ = 'director_photo'
id = Column(Integer, primary_key=True,
test_needs_autoincrement=True)
path = Column(String(255))
director_id = Column(Integer, ForeignKey('director.id'))
director = relationship(Director, backref="photos")
class Movie(Base):
__tablename__ = 'movie'
id = Column(Integer, primary_key=True, test_needs_autoincrement=True)
director_id = Column(Integer, ForeignKey('director.id'))
director = relationship(Director, backref="movies")
title = Column(String(50))
credits = relationship("Credit", backref="movie")
class Credit(Base):
__tablename__ = 'credit'
id = Column(Integer, primary_key=True, test_needs_autoincrement=True)
movie_id = Column(Integer, ForeignKey('movie.id'))
@classmethod
def insert_data(cls):
Movie = cls.classes.Movie
Director = cls.classes.Director
DirectorPhoto = cls.classes.DirectorPhoto
Credit = cls.classes.Credit
d = Director(name='Woody Allen')
d.photos = [DirectorPhoto(path='/1.jpg'),
DirectorPhoto(path='/2.jpg')]
d.movies = [Movie(title='Manhattan', credits=[Credit(), Credit()]),
Movie(title='Sweet and Lowdown', credits=[Credit()])]
sess = create_session()
sess.add_all([d])
sess.flush()
def test_distinct_strategy_opt_m2o(self):
self._run_test_m2o(True, None)
self._run_test_m2o(False, None)
def test_distinct_unrelated_opt_m2o(self):
self._run_test_m2o(None, True)
self._run_test_m2o(None, False)
def _run_test_m2o(self,
director_strategy_level,
photo_strategy_level):
# test where the innermost is m2o, e.g.
# Movie->director
Movie = self.classes.Movie
Director = self.classes.Director
Movie.director.property.distinct_target_key = director_strategy_level
Director.photos.property.distinct_target_key = photo_strategy_level
# the DISTINCT is controlled by
# only the Movie->director relationship, *not* the
# Director.photos
expect_distinct = director_strategy_level in (True, None)
s = create_session()
q = (
s.query(Movie)
.options(
subqueryload(Movie.director)
.subqueryload(Director.photos)
)
)
ctx = q._compile_context()
q2 = ctx.attributes[
('subquery', (inspect(Movie), inspect(Movie).attrs.director))
]
self.assert_compile(
q2,
'SELECT director.id AS director_id, '
'director.name AS director_name, '
'anon_1.movie_director_id AS anon_1_movie_director_id '
'FROM (SELECT%s movie.director_id AS movie_director_id '
'FROM movie) AS anon_1 '
'JOIN director ON director.id = anon_1.movie_director_id '
'ORDER BY anon_1.movie_director_id' % (
" DISTINCT" if expect_distinct else "")
)
ctx2 = q2._compile_context()
result = s.execute(q2)
rows = result.fetchall()
if expect_distinct:
eq_(rows, [
(1, 'Woody Allen', 1),
])
else:
eq_(rows, [
(1, 'Woody Allen', 1), (1, 'Woody Allen', 1),
])
q3 = ctx2.attributes[
('subquery', (inspect(Director), inspect(Director).attrs.photos))
]
self.assert_compile(
q3,
'SELECT director_photo.id AS director_photo_id, '
'director_photo.path AS director_photo_path, '
'director_photo.director_id AS director_photo_director_id, '
'director_1.id AS director_1_id '
'FROM (SELECT%s movie.director_id AS movie_director_id '
'FROM movie) AS anon_1 '
'JOIN director AS director_1 ON director_1.id = anon_1.movie_director_id '
'JOIN director_photo ON director_1.id = director_photo.director_id '
'ORDER BY director_1.id' % (
" DISTINCT" if expect_distinct else "")
)
result = s.execute(q3)
rows = result.fetchall()
if expect_distinct:
eq_(rows, [
(1, u'/1.jpg', 1, 1),
(2, u'/2.jpg', 1, 1),
])
else:
eq_(rows, [
(1, u'/1.jpg', 1, 1),
(2, u'/2.jpg', 1, 1),
(1, u'/1.jpg', 1, 1),
(2, u'/2.jpg', 1, 1),
])
movies = q.all()
# check number of persistent objects in session
eq_(len(list(s)), 5)
def test_cant_do_distinct_in_joins(self):
"""the DISTINCT feature here works when the m2o is in the innermost
mapper, but when we are just joining along relationships outside
of that, we can still have dupes, and there's no solution to that.
"""
Movie = self.classes.Movie
Credit = self.classes.Credit
s = create_session()
q = (
s.query(Credit)
.options(
subqueryload(Credit.movie)
.subqueryload(Movie.director)
)
)
ctx = q._compile_context()
q2 = ctx.attributes[
('subquery', (inspect(Credit), Credit.movie.property))
]
ctx2 = q2._compile_context()
q3 = ctx2.attributes[
('subquery', (inspect(Movie), Movie.director.property))
]
result = s.execute(q3)
eq_(
result.fetchall(),
[
(1, 'Woody Allen', 1), (1, 'Woody Allen', 1),
]
)