mirror of
https://github.com/python/cpython.git
synced 2026-05-06 04:37:33 -04:00
gh-81074: Allow non-ASCII addr_spec in email.headerregistry.Address (#122477)
The email.headerregistry.Address constructor raised an error if addr_spec contained a non-ASCII character. (But it fully supports non-ASCII in the separate username and domain args.) This change removes the error for a non-ASCII addr_spec, as well as the Defect that triggered it. In the unicode era non-ascii is not a defect, though it is an error when an attempt is made to serialize it to ascii. The serialization issue was handled in #122540.
This commit is contained in:
@@ -1503,11 +1503,6 @@ def get_local_part(value):
|
|||||||
local_part.defects.append(errors.ObsoleteHeaderDefect(
|
local_part.defects.append(errors.ObsoleteHeaderDefect(
|
||||||
"local-part is not a dot-atom (contains CFWS)"))
|
"local-part is not a dot-atom (contains CFWS)"))
|
||||||
local_part[0] = obs_local_part
|
local_part[0] = obs_local_part
|
||||||
try:
|
|
||||||
local_part.value.encode('ascii')
|
|
||||||
except UnicodeEncodeError:
|
|
||||||
local_part.defects.append(errors.NonASCIILocalPartDefect(
|
|
||||||
"local-part contains non-ASCII characters)"))
|
|
||||||
return local_part, value
|
return local_part, value
|
||||||
|
|
||||||
def get_obs_local_part(value):
|
def get_obs_local_part(value):
|
||||||
|
|||||||
+3
-3
@@ -109,9 +109,9 @@ class ObsoleteHeaderDefect(HeaderDefect):
|
|||||||
"""Header uses syntax declared obsolete by RFC 5322"""
|
"""Header uses syntax declared obsolete by RFC 5322"""
|
||||||
|
|
||||||
class NonASCIILocalPartDefect(HeaderDefect):
|
class NonASCIILocalPartDefect(HeaderDefect):
|
||||||
"""local_part contains non-ASCII characters"""
|
"""Unused. Note: this error is deprecated and may be removed in the future."""
|
||||||
# This defect only occurs during unicode parsing, not when
|
# RFC 6532 permits a non-ASCII local-part. _header_value_parser previously
|
||||||
# parsing messages decoded from binary.
|
# treated this as a parse-time defect (when parsing Unicode, but not bytes).
|
||||||
|
|
||||||
class InvalidDateDefect(HeaderDefect):
|
class InvalidDateDefect(HeaderDefect):
|
||||||
"""Header has unparsable or invalid date"""
|
"""Header has unparsable or invalid date"""
|
||||||
|
|||||||
@@ -1235,17 +1235,6 @@ class TestParser(TestParserMixin, TestEmailBase):
|
|||||||
'@example.com')
|
'@example.com')
|
||||||
self.assertEqual(local_part.local_part, r'\example\\ example')
|
self.assertEqual(local_part.local_part, r'\example\\ example')
|
||||||
|
|
||||||
def test_get_local_part_unicode_defect(self):
|
|
||||||
# Currently this only happens when parsing unicode, not when parsing
|
|
||||||
# stuff that was originally binary.
|
|
||||||
local_part = self._test_get_x(parser.get_local_part,
|
|
||||||
'exámple@example.com',
|
|
||||||
'exámple',
|
|
||||||
'exámple',
|
|
||||||
[errors.NonASCIILocalPartDefect],
|
|
||||||
'@example.com')
|
|
||||||
self.assertEqual(local_part.local_part, 'exámple')
|
|
||||||
|
|
||||||
# get_dtext
|
# get_dtext
|
||||||
|
|
||||||
def test_get_dtext_only(self):
|
def test_get_dtext_only(self):
|
||||||
|
|||||||
@@ -1543,17 +1543,19 @@ class TestAddressAndGroup(TestEmailBase):
|
|||||||
self.assertEqual(str(a), '"Sara J." <"bad name"@example.com>')
|
self.assertEqual(str(a), '"Sara J." <"bad name"@example.com>')
|
||||||
|
|
||||||
def test_il8n(self):
|
def test_il8n(self):
|
||||||
a = Address('Éric', 'wok', 'exàmple.com')
|
a = Address('Éric', 'wők', 'exàmple.com')
|
||||||
self.assertEqual(a.display_name, 'Éric')
|
self.assertEqual(a.display_name, 'Éric')
|
||||||
self.assertEqual(a.username, 'wok')
|
self.assertEqual(a.username, 'wők')
|
||||||
self.assertEqual(a.domain, 'exàmple.com')
|
self.assertEqual(a.domain, 'exàmple.com')
|
||||||
self.assertEqual(a.addr_spec, 'wok@exàmple.com')
|
self.assertEqual(a.addr_spec, 'wők@exàmple.com')
|
||||||
self.assertEqual(str(a), 'Éric <wok@exàmple.com>')
|
self.assertEqual(str(a), 'Éric <wők@exàmple.com>')
|
||||||
|
|
||||||
# XXX: there is an API design issue that needs to be solved here.
|
def test_i18n_in_addr_spec(self):
|
||||||
#def test_non_ascii_username_raises(self):
|
a = Address(addr_spec='wők@exàmple.com')
|
||||||
# with self.assertRaises(ValueError):
|
self.assertEqual(a.username, 'wők')
|
||||||
# Address('foo', 'wők', 'example.com')
|
self.assertEqual(a.domain, 'exàmple.com')
|
||||||
|
self.assertEqual(a.addr_spec, 'wők@exàmple.com')
|
||||||
|
self.assertEqual(str(a), 'wők@exàmple.com')
|
||||||
|
|
||||||
def test_crlf_in_constructor_args_raises(self):
|
def test_crlf_in_constructor_args_raises(self):
|
||||||
cases = (
|
cases = (
|
||||||
@@ -1574,10 +1576,6 @@ class TestAddressAndGroup(TestEmailBase):
|
|||||||
with self.subTest(kwargs=kwargs), self.assertRaisesRegex(ValueError, "invalid arguments"):
|
with self.subTest(kwargs=kwargs), self.assertRaisesRegex(ValueError, "invalid arguments"):
|
||||||
Address(**kwargs)
|
Address(**kwargs)
|
||||||
|
|
||||||
def test_non_ascii_username_in_addr_spec_raises(self):
|
|
||||||
with self.assertRaises(ValueError):
|
|
||||||
Address('foo', addr_spec='wők@example.com')
|
|
||||||
|
|
||||||
def test_address_addr_spec_and_username_raises(self):
|
def test_address_addr_spec_and_username_raises(self):
|
||||||
with self.assertRaises(TypeError):
|
with self.assertRaises(TypeError):
|
||||||
Address('foo', username='bing', addr_spec='bar@baz')
|
Address('foo', username='bing', addr_spec='bar@baz')
|
||||||
|
|||||||
@@ -0,0 +1,8 @@
|
|||||||
|
The :mod:`email` module no longer treats email addresses with non-ASCII
|
||||||
|
characters as defects when parsing a Unicode string or in the ``addr_spec``
|
||||||
|
parameter to :class:`email.headerregistry.Address`. :rfc:`5322` permits such
|
||||||
|
addresses, and they were already supported when parsing bytes and in the Address
|
||||||
|
``username`` parameter.
|
||||||
|
|
||||||
|
The (undocumented) :exc:`!email.errors.NonASCIILocalPartDefect` is no longer
|
||||||
|
used and should be considered deprecated.
|
||||||
Reference in New Issue
Block a user