mirror of
https://github.com/python/cpython.git
synced 2026-05-06 04:37:33 -04:00
gh-67041: Allow to distinguish between empty and not defined URI components (GH-123305)
Changes in the urllib.parse module: * Add option missing_as_none in urlparse(), urlsplit() and urldefrag(). If it is true, represent not defined components as None instead of an empty string. * Add option keep_empty in urlunparse() and urlunsplit(). If it is true, keep empty non-None components in the resulting string.
This commit is contained in:
+136
-76
@@ -50,12 +50,16 @@ URL Parsing
|
||||
The URL parsing functions focus on splitting a URL string into its components,
|
||||
or on combining URL components into a URL string.
|
||||
|
||||
.. function:: urlparse(urlstring, scheme='', allow_fragments=True)
|
||||
.. function:: urlparse(urlstring, scheme=None, allow_fragments=True, *, missing_as_none=False)
|
||||
|
||||
Parse a URL into six components, returning a 6-item :term:`named tuple`. This
|
||||
corresponds to the general structure of a URL:
|
||||
``scheme://netloc/path;parameters?query#fragment``.
|
||||
Each tuple item is a string, possibly empty. The components are not broken up
|
||||
Each tuple item is a string, possibly empty, or ``None`` if
|
||||
*missing_as_none* is true.
|
||||
Not defined component are represented an empty string (by default) or
|
||||
``None`` if *missing_as_none* is true.
|
||||
The components are not broken up
|
||||
into smaller parts (for example, the network location is a single string), and %
|
||||
escapes are not expanded. The delimiters as shown above are not part of the
|
||||
result, except for a leading slash in the *path* component, which is retained if
|
||||
@@ -84,6 +88,12 @@ or on combining URL components into a URL string.
|
||||
80
|
||||
>>> o._replace(fragment="").geturl()
|
||||
'http://docs.python.org:80/3/library/urllib.parse.html?highlight=params'
|
||||
>>> urlparse("http://docs.python.org?")
|
||||
ParseResult(scheme='http', netloc='docs.python.org',
|
||||
path='', params='', query='', fragment='')
|
||||
>>> urlparse("http://docs.python.org?", missing_as_none=True)
|
||||
ParseResult(scheme='http', netloc='docs.python.org',
|
||||
path='', params=None, query='', fragment=None)
|
||||
|
||||
Following the syntax specifications in :rfc:`1808`, urlparse recognizes
|
||||
a netloc only if it is properly introduced by '//'. Otherwise the
|
||||
@@ -101,47 +111,53 @@ or on combining URL components into a URL string.
|
||||
ParseResult(scheme='', netloc='', path='www.cwi.nl/%7Eguido/Python.html',
|
||||
params='', query='', fragment='')
|
||||
>>> urlparse('help/Python.html')
|
||||
ParseResult(scheme='', netloc='', path='help/Python.html', params='',
|
||||
query='', fragment='')
|
||||
ParseResult(scheme='', netloc='', path='help/Python.html',
|
||||
params='', query='', fragment='')
|
||||
>>> urlparse('help/Python.html', missing_as_none=True)
|
||||
ParseResult(scheme=None, netloc=None, path='help/Python.html',
|
||||
params=None, query=None, fragment=None)
|
||||
|
||||
The *scheme* argument gives the default addressing scheme, to be
|
||||
used only if the URL does not specify one. It should be the same type
|
||||
(text or bytes) as *urlstring*, except that the default value ``''`` is
|
||||
(text or bytes) as *urlstring* or ``None``, except that the ``''`` is
|
||||
always allowed, and is automatically converted to ``b''`` if appropriate.
|
||||
|
||||
If the *allow_fragments* argument is false, fragment identifiers are not
|
||||
recognized. Instead, they are parsed as part of the path, parameters
|
||||
or query component, and :attr:`fragment` is set to the empty string in
|
||||
the return value.
|
||||
or query component, and :attr:`fragment` is set to ``None`` or the empty
|
||||
string (depending on the value of *missing_as_none*) in the return value.
|
||||
|
||||
The return value is a :term:`named tuple`, which means that its items can
|
||||
be accessed by index or as named attributes, which are:
|
||||
|
||||
+------------------+-------+-------------------------+------------------------+
|
||||
| Attribute | Index | Value | Value if not present |
|
||||
+==================+=======+=========================+========================+
|
||||
| :attr:`scheme` | 0 | URL scheme specifier | *scheme* parameter |
|
||||
+------------------+-------+-------------------------+------------------------+
|
||||
| :attr:`netloc` | 1 | Network location part | empty string |
|
||||
+------------------+-------+-------------------------+------------------------+
|
||||
| :attr:`path` | 2 | Hierarchical path | empty string |
|
||||
+------------------+-------+-------------------------+------------------------+
|
||||
| :attr:`params` | 3 | Parameters for last | empty string |
|
||||
| | | path element | |
|
||||
+------------------+-------+-------------------------+------------------------+
|
||||
| :attr:`query` | 4 | Query component | empty string |
|
||||
+------------------+-------+-------------------------+------------------------+
|
||||
| :attr:`fragment` | 5 | Fragment identifier | empty string |
|
||||
+------------------+-------+-------------------------+------------------------+
|
||||
| :attr:`username` | | User name | :const:`None` |
|
||||
+------------------+-------+-------------------------+------------------------+
|
||||
| :attr:`password` | | Password | :const:`None` |
|
||||
+------------------+-------+-------------------------+------------------------+
|
||||
| :attr:`hostname` | | Host name (lower case) | :const:`None` |
|
||||
+------------------+-------+-------------------------+------------------------+
|
||||
| :attr:`port` | | Port number as integer, | :const:`None` |
|
||||
| | | if present | |
|
||||
+------------------+-------+-------------------------+------------------------+
|
||||
+------------------+-------+-------------------------+-------------------------------+
|
||||
| Attribute | Index | Value | Value if not present |
|
||||
+==================+=======+=========================+===============================+
|
||||
| :attr:`scheme` | 0 | URL scheme specifier | *scheme* parameter or |
|
||||
| | | | empty string [1]_ |
|
||||
+------------------+-------+-------------------------+-------------------------------+
|
||||
| :attr:`netloc` | 1 | Network location part | ``None`` or empty string [1]_ |
|
||||
+------------------+-------+-------------------------+-------------------------------+
|
||||
| :attr:`path` | 2 | Hierarchical path | empty string |
|
||||
+------------------+-------+-------------------------+-------------------------------+
|
||||
| :attr:`params` | 3 | Parameters for last | ``None`` or empty string [1]_ |
|
||||
| | | path element | |
|
||||
+------------------+-------+-------------------------+-------------------------------+
|
||||
| :attr:`query` | 4 | Query component | ``None`` or empty string [1]_ |
|
||||
+------------------+-------+-------------------------+-------------------------------+
|
||||
| :attr:`fragment` | 5 | Fragment identifier | ``None`` or empty string [1]_ |
|
||||
+------------------+-------+-------------------------+-------------------------------+
|
||||
| :attr:`username` | | User name | ``None`` |
|
||||
+------------------+-------+-------------------------+-------------------------------+
|
||||
| :attr:`password` | | Password | ``None`` |
|
||||
+------------------+-------+-------------------------+-------------------------------+
|
||||
| :attr:`hostname` | | Host name (lower case) | ``None`` |
|
||||
+------------------+-------+-------------------------+-------------------------------+
|
||||
| :attr:`port` | | Port number as integer, | ``None`` |
|
||||
| | | if present | |
|
||||
+------------------+-------+-------------------------+-------------------------------+
|
||||
|
||||
.. [1] Depending on the value of the *missing_as_none* argument.
|
||||
|
||||
Reading the :attr:`port` attribute will raise a :exc:`ValueError` if
|
||||
an invalid port is specified in the URL. See section
|
||||
@@ -187,12 +203,15 @@ or on combining URL components into a URL string.
|
||||
|
||||
.. versionchanged:: 3.6
|
||||
Out-of-range port numbers now raise :exc:`ValueError`, instead of
|
||||
returning :const:`None`.
|
||||
returning ``None``.
|
||||
|
||||
.. versionchanged:: 3.8
|
||||
Characters that affect netloc parsing under NFKC normalization will
|
||||
now raise :exc:`ValueError`.
|
||||
|
||||
.. versionchanged:: next
|
||||
Added the *missing_as_none* parameter.
|
||||
|
||||
|
||||
.. function:: parse_qs(qs, keep_blank_values=False, strict_parsing=False, encoding='utf-8', errors='replace', max_num_fields=None, separator='&')
|
||||
|
||||
@@ -288,15 +307,27 @@ or on combining URL components into a URL string.
|
||||
|
||||
|
||||
.. function:: urlunparse(parts)
|
||||
urlunparse(parts, *, keep_empty)
|
||||
|
||||
Construct a URL from a tuple as returned by ``urlparse()``. The *parts*
|
||||
argument can be any six-item iterable. This may result in a slightly
|
||||
different, but equivalent URL, if the URL that was parsed originally had
|
||||
unnecessary delimiters (for example, a ``?`` with an empty query; the RFC
|
||||
states that these are equivalent).
|
||||
argument can be any six-item iterable.
|
||||
|
||||
This may result in a slightly different, but equivalent URL, if the
|
||||
URL that was parsed originally had unnecessary delimiters (for example,
|
||||
a ``?`` with an empty query; the RFC states that these are equivalent).
|
||||
|
||||
If *keep_empty* is true, empty strings are kept in the result (for example,
|
||||
a ``?`` for an empty query), only ``None`` components are omitted.
|
||||
This allows rebuilding a URL that was parsed with option
|
||||
``missing_as_none=True``.
|
||||
By default, *keep_empty* is true if *parts* is the result of the
|
||||
:func:`urlparse` call with ``missing_as_none=True``.
|
||||
|
||||
.. versionchanged:: next
|
||||
Added the *keep_empty* parameter.
|
||||
|
||||
|
||||
.. function:: urlsplit(urlstring, scheme='', allow_fragments=True)
|
||||
.. function:: urlsplit(urlstring, scheme=None, allow_fragments=True, *, missing_as_none=False)
|
||||
|
||||
This is similar to :func:`urlparse`, but does not split the params from the URL.
|
||||
This should generally be used instead of :func:`urlparse` if the more recent URL
|
||||
@@ -310,28 +341,31 @@ or on combining URL components into a URL string.
|
||||
The return value is a :term:`named tuple`, its items can be accessed by index
|
||||
or as named attributes:
|
||||
|
||||
+------------------+-------+-------------------------+----------------------+
|
||||
| Attribute | Index | Value | Value if not present |
|
||||
+==================+=======+=========================+======================+
|
||||
| :attr:`scheme` | 0 | URL scheme specifier | *scheme* parameter |
|
||||
+------------------+-------+-------------------------+----------------------+
|
||||
| :attr:`netloc` | 1 | Network location part | empty string |
|
||||
+------------------+-------+-------------------------+----------------------+
|
||||
| :attr:`path` | 2 | Hierarchical path | empty string |
|
||||
+------------------+-------+-------------------------+----------------------+
|
||||
| :attr:`query` | 3 | Query component | empty string |
|
||||
+------------------+-------+-------------------------+----------------------+
|
||||
| :attr:`fragment` | 4 | Fragment identifier | empty string |
|
||||
+------------------+-------+-------------------------+----------------------+
|
||||
| :attr:`username` | | User name | :const:`None` |
|
||||
+------------------+-------+-------------------------+----------------------+
|
||||
| :attr:`password` | | Password | :const:`None` |
|
||||
+------------------+-------+-------------------------+----------------------+
|
||||
| :attr:`hostname` | | Host name (lower case) | :const:`None` |
|
||||
+------------------+-------+-------------------------+----------------------+
|
||||
| :attr:`port` | | Port number as integer, | :const:`None` |
|
||||
| | | if present | |
|
||||
+------------------+-------+-------------------------+----------------------+
|
||||
+------------------+-------+-------------------------+-------------------------------+
|
||||
| Attribute | Index | Value | Value if not present |
|
||||
+==================+=======+=========================+===============================+
|
||||
| :attr:`scheme` | 0 | URL scheme specifier | *scheme* parameter or |
|
||||
| | | | empty string [1]_ |
|
||||
+------------------+-------+-------------------------+-------------------------------+
|
||||
| :attr:`netloc` | 1 | Network location part | ``None`` or empty string [2]_ |
|
||||
+------------------+-------+-------------------------+-------------------------------+
|
||||
| :attr:`path` | 2 | Hierarchical path | empty string |
|
||||
+------------------+-------+-------------------------+-------------------------------+
|
||||
| :attr:`query` | 3 | Query component | ``None`` or empty string [2]_ |
|
||||
+------------------+-------+-------------------------+-------------------------------+
|
||||
| :attr:`fragment` | 4 | Fragment identifier | ``None`` or empty string [2]_ |
|
||||
+------------------+-------+-------------------------+-------------------------------+
|
||||
| :attr:`username` | | User name | ``None`` |
|
||||
+------------------+-------+-------------------------+-------------------------------+
|
||||
| :attr:`password` | | Password | ``None`` |
|
||||
+------------------+-------+-------------------------+-------------------------------+
|
||||
| :attr:`hostname` | | Host name (lower case) | ``None`` |
|
||||
+------------------+-------+-------------------------+-------------------------------+
|
||||
| :attr:`port` | | Port number as integer, | ``None`` |
|
||||
| | | if present | |
|
||||
+------------------+-------+-------------------------+-------------------------------+
|
||||
|
||||
.. [2] Depending on the value of the *missing_as_none* argument.
|
||||
|
||||
Reading the :attr:`port` attribute will raise a :exc:`ValueError` if
|
||||
an invalid port is specified in the URL. See section
|
||||
@@ -356,7 +390,7 @@ or on combining URL components into a URL string.
|
||||
|
||||
.. versionchanged:: 3.6
|
||||
Out-of-range port numbers now raise :exc:`ValueError`, instead of
|
||||
returning :const:`None`.
|
||||
returning ``None``.
|
||||
|
||||
.. versionchanged:: 3.8
|
||||
Characters that affect netloc parsing under NFKC normalization will
|
||||
@@ -368,15 +402,31 @@ or on combining URL components into a URL string.
|
||||
.. versionchanged:: 3.12
|
||||
Leading WHATWG C0 control and space characters are stripped from the URL.
|
||||
|
||||
.. versionchanged:: next
|
||||
Added the *missing_as_none* parameter.
|
||||
|
||||
.. _WHATWG spec: https://url.spec.whatwg.org/#concept-basic-url-parser
|
||||
|
||||
.. function:: urlunsplit(parts)
|
||||
urlunsplit(parts, *, keep_empty)
|
||||
|
||||
Combine the elements of a tuple as returned by :func:`urlsplit` into a
|
||||
complete URL as a string. The *parts* argument can be any five-item
|
||||
iterable. This may result in a slightly different, but equivalent URL, if the
|
||||
URL that was parsed originally had unnecessary delimiters (for example, a ?
|
||||
with an empty query; the RFC states that these are equivalent).
|
||||
iterable.
|
||||
|
||||
This may result in a slightly different, but equivalent URL, if the
|
||||
URL that was parsed originally had unnecessary delimiters (for example,
|
||||
a ``?`` with an empty query; the RFC states that these are equivalent).
|
||||
|
||||
If *keep_empty* is true, empty strings are kept in the result (for example,
|
||||
a ``?`` for an empty query), only ``None`` components are omitted.
|
||||
This allows rebuilding a URL that was parsed with option
|
||||
``missing_as_none=True``.
|
||||
By default, *keep_empty* is true if *parts* is the result of the
|
||||
:func:`urlsplit` call with ``missing_as_none=True``.
|
||||
|
||||
.. versionchanged:: next
|
||||
Added the *keep_empty* parameter.
|
||||
|
||||
|
||||
.. function:: urljoin(base, url, allow_fragments=True)
|
||||
@@ -422,23 +472,25 @@ or on combining URL components into a URL string.
|
||||
Behavior updated to match the semantics defined in :rfc:`3986`.
|
||||
|
||||
|
||||
.. function:: urldefrag(url)
|
||||
.. function:: urldefrag(url, *, missing_as_none=False)
|
||||
|
||||
If *url* contains a fragment identifier, return a modified version of *url*
|
||||
with no fragment identifier, and the fragment identifier as a separate
|
||||
string. If there is no fragment identifier in *url*, return *url* unmodified
|
||||
and an empty string.
|
||||
and an empty string (by default) or ``None`` if *missing_as_none* is true.
|
||||
|
||||
The return value is a :term:`named tuple`, its items can be accessed by index
|
||||
or as named attributes:
|
||||
|
||||
+------------------+-------+-------------------------+----------------------+
|
||||
| Attribute | Index | Value | Value if not present |
|
||||
+==================+=======+=========================+======================+
|
||||
| :attr:`url` | 0 | URL with no fragment | empty string |
|
||||
+------------------+-------+-------------------------+----------------------+
|
||||
| :attr:`fragment` | 1 | Fragment identifier | empty string |
|
||||
+------------------+-------+-------------------------+----------------------+
|
||||
+------------------+-------+-------------------------+-------------------------------+
|
||||
| Attribute | Index | Value | Value if not present |
|
||||
+==================+=======+=========================+===============================+
|
||||
| :attr:`url` | 0 | URL with no fragment | empty string |
|
||||
+------------------+-------+-------------------------+-------------------------------+
|
||||
| :attr:`fragment` | 1 | Fragment identifier | ``None`` or empty string [3]_ |
|
||||
+------------------+-------+-------------------------+-------------------------------+
|
||||
|
||||
.. [3] Depending on the value of the *missing_as_none* argument.
|
||||
|
||||
See section :ref:`urlparse-result-object` for more information on the result
|
||||
object.
|
||||
@@ -446,6 +498,9 @@ or on combining URL components into a URL string.
|
||||
.. versionchanged:: 3.2
|
||||
Result is a structured object rather than a simple 2-tuple.
|
||||
|
||||
.. versionchanged:: next
|
||||
Added the *missing_as_none* parameter.
|
||||
|
||||
.. function:: unwrap(url)
|
||||
|
||||
Extract the url from a wrapped URL (that is, a string formatted as
|
||||
@@ -465,8 +520,9 @@ URLs elsewhere. Their purpose is for practical functionality rather than
|
||||
purity.
|
||||
|
||||
Instead of raising an exception on unusual input, they may instead return some
|
||||
component parts as empty strings. Or components may contain more than perhaps
|
||||
they should.
|
||||
component parts as empty strings or ``None`` (depending on the value of the
|
||||
*missing_as_none* argument).
|
||||
Or components may contain more than perhaps they should.
|
||||
|
||||
We recommend that users of these APIs where the values may be used anywhere
|
||||
with security implications code defensively. Do some verification within your
|
||||
@@ -542,7 +598,8 @@ previous section, as well as an additional method:
|
||||
Return the re-combined version of the original URL as a string. This may
|
||||
differ from the original URL in that the scheme may be normalized to lower
|
||||
case and empty components may be dropped. Specifically, empty parameters,
|
||||
queries, and fragment identifiers will be removed.
|
||||
queries, and fragment identifiers will be removed unless the URL was parsed
|
||||
with ``missing_as_none=True``.
|
||||
|
||||
For :func:`urldefrag` results, only empty fragment identifiers will be removed.
|
||||
For :func:`urlsplit` and :func:`urlparse` results, all noted changes will be
|
||||
@@ -559,6 +616,9 @@ previous section, as well as an additional method:
|
||||
>>> r2 = urlsplit(r1.geturl())
|
||||
>>> r2.geturl()
|
||||
'http://www.Python.org/doc/'
|
||||
>>> r3 = urlsplit(url, missing_as_none=True)
|
||||
>>> r3.geturl()
|
||||
'http://www.Python.org/doc/#'
|
||||
|
||||
|
||||
The following classes provide the implementations of the structured parse
|
||||
|
||||
@@ -836,6 +836,18 @@ unittest
|
||||
(Contributed by Garry Cairns in :gh:`134567`.)
|
||||
|
||||
|
||||
urllib.parse
|
||||
------------
|
||||
|
||||
* Add the *missing_as_none* parameter to :func:`~urllib.parse.urlparse`,
|
||||
:func:`~urllib.parse.urlsplit` and :func:`~urllib.parse.urldefrag` functions.
|
||||
Add the *keep_empty* parameter to :func:`~urllib.parse.urlunparse` and
|
||||
:func:`~urllib.parse.urlunsplit` functions.
|
||||
This allows to distinguish between empty and not defined URI components
|
||||
and preserve empty components.
|
||||
(Contributed by Serhiy Storchaka in :gh:`67041`.)
|
||||
|
||||
|
||||
venv
|
||||
----
|
||||
|
||||
|
||||
+405
-171
@@ -1,7 +1,10 @@
|
||||
import copy
|
||||
import pickle
|
||||
import sys
|
||||
import unicodedata
|
||||
import unittest
|
||||
import urllib.parse
|
||||
from urllib.parse import urldefrag, urlparse, urlsplit, urlunparse, urlunsplit
|
||||
from test import support
|
||||
|
||||
RFC1808_BASE = "http://a/b/c/d;p?q#f"
|
||||
@@ -107,19 +110,46 @@ class UrlParseTestCase(unittest.TestCase):
|
||||
def checkRoundtrips(self, url, parsed, split, url2=None):
|
||||
if url2 is None:
|
||||
url2 = url
|
||||
result = urllib.parse.urlparse(url)
|
||||
self.checkRoundtrips1(url, parsed, split, missing_as_none=True)
|
||||
empty = url[:0]
|
||||
parsed = tuple(x or empty for x in parsed)
|
||||
split = tuple(x or empty for x in split)
|
||||
self.checkRoundtrips1(url, parsed, split, url2, missing_as_none=False)
|
||||
|
||||
result = urlparse(url, missing_as_none=True)
|
||||
self.assertEqual(urlunparse(result, keep_empty=False), url2)
|
||||
self.assertEqual(urlunparse(tuple(result), keep_empty=False), url2)
|
||||
result = urlparse(url, missing_as_none=False)
|
||||
with self.assertRaises(ValueError):
|
||||
urlunparse(result, keep_empty=True)
|
||||
urlunparse(tuple(result), keep_empty=True)
|
||||
|
||||
result = urlsplit(url, missing_as_none=True)
|
||||
self.assertEqual(urlunsplit(result, keep_empty=False), url2)
|
||||
self.assertEqual(urlunsplit(tuple(result), keep_empty=False), url2)
|
||||
result = urlsplit(url, missing_as_none=False)
|
||||
with self.assertRaises(ValueError):
|
||||
urlunsplit(result, keep_empty=True)
|
||||
urlunsplit(tuple(result), keep_empty=True)
|
||||
|
||||
def checkRoundtrips1(self, url, parsed, split, url2=None, *, missing_as_none):
|
||||
if url2 is None:
|
||||
url2 = url
|
||||
result = urlparse(url, missing_as_none=missing_as_none)
|
||||
self.assertSequenceEqual(result, parsed)
|
||||
t = (result.scheme, result.netloc, result.path,
|
||||
result.params, result.query, result.fragment)
|
||||
self.assertSequenceEqual(t, parsed)
|
||||
# put it back together and it should be the same
|
||||
result2 = urllib.parse.urlunparse(result)
|
||||
self.assertSequenceEqual(result2, url2)
|
||||
self.assertSequenceEqual(result2, result.geturl())
|
||||
result2 = urlunparse(result)
|
||||
self.assertEqual(result2, url2)
|
||||
self.assertEqual(result2, result.geturl())
|
||||
self.assertEqual(urlunparse(result, keep_empty=missing_as_none), url2)
|
||||
self.assertEqual(urlunparse(tuple(result), keep_empty=missing_as_none), result2)
|
||||
|
||||
# the result of geturl() is a fixpoint; we can always parse it
|
||||
# again to get the same result:
|
||||
result3 = urllib.parse.urlparse(result.geturl())
|
||||
result3 = urlparse(result.geturl(), missing_as_none=missing_as_none)
|
||||
self.assertEqual(result3.geturl(), result.geturl())
|
||||
self.assertSequenceEqual(result3, result)
|
||||
self.assertEqual(result3.scheme, result.scheme)
|
||||
@@ -134,17 +164,18 @@ class UrlParseTestCase(unittest.TestCase):
|
||||
self.assertEqual(result3.port, result.port)
|
||||
|
||||
# check the roundtrip using urlsplit() as well
|
||||
result = urllib.parse.urlsplit(url)
|
||||
result = urlsplit(url, missing_as_none=missing_as_none)
|
||||
self.assertSequenceEqual(result, split)
|
||||
t = (result.scheme, result.netloc, result.path,
|
||||
result.query, result.fragment)
|
||||
result.query, result.fragment)
|
||||
self.assertSequenceEqual(t, split)
|
||||
result2 = urllib.parse.urlunsplit(result)
|
||||
self.assertSequenceEqual(result2, url2)
|
||||
self.assertSequenceEqual(result2, result.geturl())
|
||||
result2 = urlunsplit(result)
|
||||
self.assertEqual(result2, url2)
|
||||
self.assertEqual(result2, result.geturl())
|
||||
self.assertEqual(urlunsplit(tuple(result), keep_empty=missing_as_none), result2)
|
||||
|
||||
# check the fixpoint property of re-parsing the result of geturl()
|
||||
result3 = urllib.parse.urlsplit(result.geturl())
|
||||
result3 = urlsplit(result.geturl(), missing_as_none=missing_as_none)
|
||||
self.assertEqual(result3.geturl(), result.geturl())
|
||||
self.assertSequenceEqual(result3, result)
|
||||
self.assertEqual(result3.scheme, result.scheme)
|
||||
@@ -177,94 +208,94 @@ class UrlParseTestCase(unittest.TestCase):
|
||||
@support.subTests('bytes', (False, True))
|
||||
@support.subTests('url,parsed,split', [
|
||||
('path/to/file',
|
||||
('', '', 'path/to/file', '', '', ''),
|
||||
('', '', 'path/to/file', '', '')),
|
||||
(None, None, 'path/to/file', None, None, None),
|
||||
(None, None, 'path/to/file', None, None)),
|
||||
('/path/to/file',
|
||||
('', '', '/path/to/file', '', '', ''),
|
||||
('', '', '/path/to/file', '', '')),
|
||||
(None, None, '/path/to/file', None, None, None),
|
||||
(None, None, '/path/to/file', None, None)),
|
||||
('//path/to/file',
|
||||
('', 'path', '/to/file', '', '', ''),
|
||||
('', 'path', '/to/file', '', '')),
|
||||
(None, 'path', '/to/file', None, None, None),
|
||||
(None, 'path', '/to/file', None, None)),
|
||||
('////path/to/file',
|
||||
('', '', '//path/to/file', '', '', ''),
|
||||
('', '', '//path/to/file', '', '')),
|
||||
(None, '', '//path/to/file', None, None, None),
|
||||
(None, '', '//path/to/file', None, None)),
|
||||
('/////path/to/file',
|
||||
('', '', '///path/to/file', '', '', ''),
|
||||
('', '', '///path/to/file', '', '')),
|
||||
(None, '', '///path/to/file', None, None, None),
|
||||
(None, '', '///path/to/file', None, None)),
|
||||
('scheme:path/to/file',
|
||||
('scheme', '', 'path/to/file', '', '', ''),
|
||||
('scheme', '', 'path/to/file', '', '')),
|
||||
('scheme', None, 'path/to/file', None, None, None),
|
||||
('scheme', None, 'path/to/file', None, None)),
|
||||
('scheme:/path/to/file',
|
||||
('scheme', '', '/path/to/file', '', '', ''),
|
||||
('scheme', '', '/path/to/file', '', '')),
|
||||
('scheme', None, '/path/to/file', None, None, None),
|
||||
('scheme', None, '/path/to/file', None, None)),
|
||||
('scheme://path/to/file',
|
||||
('scheme', 'path', '/to/file', '', '', ''),
|
||||
('scheme', 'path', '/to/file', '', '')),
|
||||
('scheme', 'path', '/to/file', None, None, None),
|
||||
('scheme', 'path', '/to/file', None, None)),
|
||||
('scheme:////path/to/file',
|
||||
('scheme', '', '//path/to/file', '', '', ''),
|
||||
('scheme', '', '//path/to/file', '', '')),
|
||||
('scheme', '', '//path/to/file', None, None, None),
|
||||
('scheme', '', '//path/to/file', None, None)),
|
||||
('scheme://///path/to/file',
|
||||
('scheme', '', '///path/to/file', '', '', ''),
|
||||
('scheme', '', '///path/to/file', '', '')),
|
||||
('scheme', '', '///path/to/file', None, None, None),
|
||||
('scheme', '', '///path/to/file', None, None)),
|
||||
('file:tmp/junk.txt',
|
||||
('file', '', 'tmp/junk.txt', '', '', ''),
|
||||
('file', '', 'tmp/junk.txt', '', '')),
|
||||
('file', None, 'tmp/junk.txt', None, None, None),
|
||||
('file', None, 'tmp/junk.txt', None, None)),
|
||||
('file:///tmp/junk.txt',
|
||||
('file', '', '/tmp/junk.txt', '', '', ''),
|
||||
('file', '', '/tmp/junk.txt', '', '')),
|
||||
('file', '', '/tmp/junk.txt', None, None, None),
|
||||
('file', '', '/tmp/junk.txt', None, None)),
|
||||
('file:////tmp/junk.txt',
|
||||
('file', '', '//tmp/junk.txt', '', '', ''),
|
||||
('file', '', '//tmp/junk.txt', '', '')),
|
||||
('file', '', '//tmp/junk.txt', None, None, None),
|
||||
('file', '', '//tmp/junk.txt', None, None)),
|
||||
('file://///tmp/junk.txt',
|
||||
('file', '', '///tmp/junk.txt', '', '', ''),
|
||||
('file', '', '///tmp/junk.txt', '', '')),
|
||||
('file', '', '///tmp/junk.txt', None, None, None),
|
||||
('file', '', '///tmp/junk.txt', None, None)),
|
||||
('http:tmp/junk.txt',
|
||||
('http', '', 'tmp/junk.txt', '', '', ''),
|
||||
('http', '', 'tmp/junk.txt', '', '')),
|
||||
('http', None, 'tmp/junk.txt', None, None, None),
|
||||
('http', None, 'tmp/junk.txt', None, None)),
|
||||
('http://example.com/tmp/junk.txt',
|
||||
('http', 'example.com', '/tmp/junk.txt', '', '', ''),
|
||||
('http', 'example.com', '/tmp/junk.txt', '', '')),
|
||||
('http', 'example.com', '/tmp/junk.txt', None, None, None),
|
||||
('http', 'example.com', '/tmp/junk.txt', None, None)),
|
||||
('http:///example.com/tmp/junk.txt',
|
||||
('http', '', '/example.com/tmp/junk.txt', '', '', ''),
|
||||
('http', '', '/example.com/tmp/junk.txt', '', '')),
|
||||
('http', '', '/example.com/tmp/junk.txt', None, None, None),
|
||||
('http', '', '/example.com/tmp/junk.txt', None, None)),
|
||||
('http:////example.com/tmp/junk.txt',
|
||||
('http', '', '//example.com/tmp/junk.txt', '', '', ''),
|
||||
('http', '', '//example.com/tmp/junk.txt', '', '')),
|
||||
('http', '', '//example.com/tmp/junk.txt', None, None, None),
|
||||
('http', '', '//example.com/tmp/junk.txt', None, None)),
|
||||
('imap://mail.python.org/mbox1',
|
||||
('imap', 'mail.python.org', '/mbox1', '', '', ''),
|
||||
('imap', 'mail.python.org', '/mbox1', '', '')),
|
||||
('imap', 'mail.python.org', '/mbox1', None, None, None),
|
||||
('imap', 'mail.python.org', '/mbox1', None, None)),
|
||||
('mms://wms.sys.hinet.net/cts/Drama/09006251100.asf',
|
||||
('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
|
||||
'', '', ''),
|
||||
None, None, None),
|
||||
('mms', 'wms.sys.hinet.net', '/cts/Drama/09006251100.asf',
|
||||
'', '')),
|
||||
None, None)),
|
||||
('nfs://server/path/to/file.txt',
|
||||
('nfs', 'server', '/path/to/file.txt', '', '', ''),
|
||||
('nfs', 'server', '/path/to/file.txt', '', '')),
|
||||
('nfs', 'server', '/path/to/file.txt', None, None, None),
|
||||
('nfs', 'server', '/path/to/file.txt', None, None)),
|
||||
('svn+ssh://svn.zope.org/repos/main/ZConfig/trunk/',
|
||||
('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
|
||||
'', '', ''),
|
||||
None, None, None),
|
||||
('svn+ssh', 'svn.zope.org', '/repos/main/ZConfig/trunk/',
|
||||
'', '')),
|
||||
None, None)),
|
||||
('git+ssh://git@github.com/user/project.git',
|
||||
('git+ssh', 'git@github.com','/user/project.git',
|
||||
'','',''),
|
||||
None,None,None),
|
||||
('git+ssh', 'git@github.com','/user/project.git',
|
||||
'', '')),
|
||||
None, None)),
|
||||
('itms-services://?action=download-manifest&url=https://example.com/app',
|
||||
('itms-services', '', '', '',
|
||||
'action=download-manifest&url=https://example.com/app', ''),
|
||||
('itms-services', '', '', None,
|
||||
'action=download-manifest&url=https://example.com/app', None),
|
||||
('itms-services', '', '',
|
||||
'action=download-manifest&url=https://example.com/app', '')),
|
||||
'action=download-manifest&url=https://example.com/app', None)),
|
||||
('+scheme:path/to/file',
|
||||
('', '', '+scheme:path/to/file', '', '', ''),
|
||||
('', '', '+scheme:path/to/file', '', '')),
|
||||
(None, None, '+scheme:path/to/file', None, None, None),
|
||||
(None, None, '+scheme:path/to/file', None, None)),
|
||||
('sch_me:path/to/file',
|
||||
('', '', 'sch_me:path/to/file', '', '', ''),
|
||||
('', '', 'sch_me:path/to/file', '', '')),
|
||||
(None, None, 'sch_me:path/to/file', None, None, None),
|
||||
(None, None, 'sch_me:path/to/file', None, None)),
|
||||
('schème:path/to/file',
|
||||
('', '', 'schème:path/to/file', '', '', ''),
|
||||
('', '', 'schème:path/to/file', '', '')),
|
||||
(None, None, 'schème:path/to/file', None, None, None),
|
||||
(None, None, 'schème:path/to/file', None, None)),
|
||||
])
|
||||
def test_roundtrips(self, bytes, url, parsed, split):
|
||||
if bytes:
|
||||
@@ -279,24 +310,24 @@ class UrlParseTestCase(unittest.TestCase):
|
||||
@support.subTests('url,url2,parsed,split', [
|
||||
('///path/to/file',
|
||||
'/path/to/file',
|
||||
('', '', '/path/to/file', '', '', ''),
|
||||
('', '', '/path/to/file', '', '')),
|
||||
(None, '', '/path/to/file', None, None, None),
|
||||
(None, '', '/path/to/file', None, None)),
|
||||
('scheme:///path/to/file',
|
||||
'scheme:/path/to/file',
|
||||
('scheme', '', '/path/to/file', '', '', ''),
|
||||
('scheme', '', '/path/to/file', '', '')),
|
||||
('scheme', '', '/path/to/file', None, None, None),
|
||||
('scheme', '', '/path/to/file', None, None)),
|
||||
('file:/tmp/junk.txt',
|
||||
'file:///tmp/junk.txt',
|
||||
('file', '', '/tmp/junk.txt', '', '', ''),
|
||||
('file', '', '/tmp/junk.txt', '', '')),
|
||||
('file', None, '/tmp/junk.txt', None, None, None),
|
||||
('file', None, '/tmp/junk.txt', None, None)),
|
||||
('http:/tmp/junk.txt',
|
||||
'http:///tmp/junk.txt',
|
||||
('http', '', '/tmp/junk.txt', '', '', ''),
|
||||
('http', '', '/tmp/junk.txt', '', '')),
|
||||
('http', None, '/tmp/junk.txt', None, None, None),
|
||||
('http', None, '/tmp/junk.txt', None, None)),
|
||||
('https:/tmp/junk.txt',
|
||||
'https:///tmp/junk.txt',
|
||||
('https', '', '/tmp/junk.txt', '', '', ''),
|
||||
('https', '', '/tmp/junk.txt', '', '')),
|
||||
('https', None, '/tmp/junk.txt', None, None, None),
|
||||
('https', None, '/tmp/junk.txt', None, None)),
|
||||
])
|
||||
def test_roundtrips_normalization(self, bytes, url, url2, parsed, split):
|
||||
if bytes:
|
||||
@@ -310,17 +341,17 @@ class UrlParseTestCase(unittest.TestCase):
|
||||
@support.subTests('scheme', ('http', 'https'))
|
||||
@support.subTests('url,parsed,split', [
|
||||
('://www.python.org',
|
||||
('www.python.org', '', '', '', ''),
|
||||
('www.python.org', '', '', '')),
|
||||
('www.python.org', '', None, None, None),
|
||||
('www.python.org', '', None, None)),
|
||||
('://www.python.org#abc',
|
||||
('www.python.org', '', '', '', 'abc'),
|
||||
('www.python.org', '', '', 'abc')),
|
||||
('www.python.org', '', None, None, 'abc'),
|
||||
('www.python.org', '', None, 'abc')),
|
||||
('://www.python.org?q=abc',
|
||||
('www.python.org', '', '', 'q=abc', ''),
|
||||
('www.python.org', '', 'q=abc', '')),
|
||||
('www.python.org', '', None, 'q=abc', None),
|
||||
('www.python.org', '', 'q=abc', None)),
|
||||
('://www.python.org/#abc',
|
||||
('www.python.org', '/', '', '', 'abc'),
|
||||
('www.python.org', '/', '', 'abc')),
|
||||
('www.python.org', '/', None, None, 'abc'),
|
||||
('www.python.org', '/', None, 'abc')),
|
||||
('://a/b/c/d;p?q#f',
|
||||
('a', '/b/c/d', 'p', 'q', 'f'),
|
||||
('a', '/b/c/d;p', 'q', 'f')),
|
||||
@@ -342,16 +373,21 @@ class UrlParseTestCase(unittest.TestCase):
|
||||
def checkJoin(self, base, relurl, expected, *, relroundtrip=True):
|
||||
with self.subTest(base=base, relurl=relurl):
|
||||
self.assertEqual(urllib.parse.urljoin(base, relurl), expected)
|
||||
baseb = base.encode('ascii')
|
||||
relurlb = relurl.encode('ascii')
|
||||
expectedb = expected.encode('ascii')
|
||||
baseb = str_encode(base)
|
||||
relurlb = str_encode(relurl)
|
||||
expectedb = str_encode(expected)
|
||||
self.assertEqual(urllib.parse.urljoin(baseb, relurlb), expectedb)
|
||||
|
||||
if relroundtrip:
|
||||
relurl = urllib.parse.urlunsplit(urllib.parse.urlsplit(relurl))
|
||||
self.assertEqual(urllib.parse.urljoin(base, relurl), expected)
|
||||
relurlb = urllib.parse.urlunsplit(urllib.parse.urlsplit(relurlb))
|
||||
self.assertEqual(urllib.parse.urljoin(baseb, relurlb), expectedb)
|
||||
relurl2 = urlunsplit(urlsplit(relurl))
|
||||
self.assertEqual(urllib.parse.urljoin(base, relurl2), expected)
|
||||
relurlb2 = urlunsplit(urlsplit(relurlb))
|
||||
self.assertEqual(urllib.parse.urljoin(baseb, relurlb2), expectedb)
|
||||
|
||||
relurl3 = urlunsplit(urlsplit(relurl, missing_as_none=True))
|
||||
self.assertEqual(urllib.parse.urljoin(base, relurl3), expected)
|
||||
relurlb3 = urlunsplit(urlsplit(relurlb, missing_as_none=True))
|
||||
self.assertEqual(urllib.parse.urljoin(baseb, relurlb3), expectedb)
|
||||
|
||||
@support.subTests('bytes', (False, True))
|
||||
@support.subTests('u', ['Python', './Python','x-newscheme://foo.com/stuff','x://y','x:/y','x:/','/',])
|
||||
@@ -387,7 +423,7 @@ class UrlParseTestCase(unittest.TestCase):
|
||||
self.checkJoin(RFC1808_BASE, '../../g', 'http://a/g')
|
||||
|
||||
# "abnormal" cases from RFC 1808:
|
||||
self.checkJoin(RFC1808_BASE, '', 'http://a/b/c/d;p?q#f')
|
||||
self.checkJoin(RFC1808_BASE, None, 'http://a/b/c/d;p?q#f')
|
||||
self.checkJoin(RFC1808_BASE, 'g.', 'http://a/b/c/g.')
|
||||
self.checkJoin(RFC1808_BASE, '.g', 'http://a/b/c/.g')
|
||||
self.checkJoin(RFC1808_BASE, 'g..', 'http://a/b/c/g..')
|
||||
@@ -411,8 +447,10 @@ class UrlParseTestCase(unittest.TestCase):
|
||||
|
||||
def test_RFC2368(self):
|
||||
# Issue 11467: path that starts with a number is not parsed correctly
|
||||
self.assertEqual(urllib.parse.urlparse('mailto:1337@example.org'),
|
||||
self.assertEqual(urlparse('mailto:1337@example.org'),
|
||||
('mailto', '', '1337@example.org', '', '', ''))
|
||||
self.assertEqual(urlparse('mailto:1337@example.org', missing_as_none=True),
|
||||
('mailto', None, '1337@example.org', None, None, None))
|
||||
|
||||
def test_RFC2396(self):
|
||||
# cases from RFC 2396
|
||||
@@ -741,18 +779,18 @@ class UrlParseTestCase(unittest.TestCase):
|
||||
@support.subTests('bytes', (False, True))
|
||||
@support.subTests('url,defrag,frag', [
|
||||
('http://python.org#frag', 'http://python.org', 'frag'),
|
||||
('http://python.org', 'http://python.org', ''),
|
||||
('http://python.org', 'http://python.org', None),
|
||||
('http://python.org/#frag', 'http://python.org/', 'frag'),
|
||||
('http://python.org/', 'http://python.org/', ''),
|
||||
('http://python.org/', 'http://python.org/', None),
|
||||
('http://python.org/?q#frag', 'http://python.org/?q', 'frag'),
|
||||
('http://python.org/?q', 'http://python.org/?q', ''),
|
||||
('http://python.org/?q', 'http://python.org/?q', None),
|
||||
('http://python.org/p#frag', 'http://python.org/p', 'frag'),
|
||||
('http://python.org/p?q', 'http://python.org/p?q', ''),
|
||||
('http://python.org/p?q', 'http://python.org/p?q', None),
|
||||
(RFC1808_BASE, 'http://a/b/c/d;p?q', 'f'),
|
||||
(RFC2396_BASE, 'http://a/b/c/d;p?q', ''),
|
||||
(RFC2396_BASE, 'http://a/b/c/d;p?q', None),
|
||||
('http://a/b/c;p?q#f', 'http://a/b/c;p?q', 'f'),
|
||||
('http://a/b/c;p?q#', 'http://a/b/c;p?q', ''),
|
||||
('http://a/b/c;p?q', 'http://a/b/c;p?q', ''),
|
||||
('http://a/b/c;p?q', 'http://a/b/c;p?q', None),
|
||||
('http://a/b/c;p?#f', 'http://a/b/c;p?', 'f'),
|
||||
('http://a/b/c;p#f', 'http://a/b/c;p', 'f'),
|
||||
('http://a/b/c;?q#f', 'http://a/b/c;?q', 'f'),
|
||||
@@ -764,14 +802,19 @@ class UrlParseTestCase(unittest.TestCase):
|
||||
('//a/b/c;p?q#f', '//a/b/c;p?q', 'f'),
|
||||
('://a/b/c;p?q#f', '://a/b/c;p?q', 'f'),
|
||||
])
|
||||
def test_urldefrag(self, bytes, url, defrag, frag):
|
||||
@support.subTests('missing_as_none', (False, True))
|
||||
def test_urldefrag(self, bytes, url, defrag, frag, missing_as_none):
|
||||
if bytes:
|
||||
url = str_encode(url)
|
||||
defrag = str_encode(defrag)
|
||||
frag = str_encode(frag)
|
||||
result = urllib.parse.urldefrag(url)
|
||||
hash = '#' if isinstance(url, str) else b'#'
|
||||
self.assertEqual(result.geturl(), url.rstrip(hash))
|
||||
result = urllib.parse.urldefrag(url, missing_as_none=missing_as_none)
|
||||
if not missing_as_none:
|
||||
hash = '#' if isinstance(url, str) else b'#'
|
||||
url = url.rstrip(hash)
|
||||
if frag is None:
|
||||
frag = url[:0]
|
||||
self.assertEqual(result.geturl(), url)
|
||||
self.assertEqual(result, (defrag, frag))
|
||||
self.assertEqual(result.url, defrag)
|
||||
self.assertEqual(result.fragment, frag)
|
||||
@@ -1001,26 +1044,27 @@ class UrlParseTestCase(unittest.TestCase):
|
||||
if not url.isascii():
|
||||
self.skipTest('non-ASCII bytes')
|
||||
url = url.encode("ascii")
|
||||
p = parse(url)
|
||||
self.assertEqual(p.scheme, b"" if bytes else "")
|
||||
p = parse(url, missing_as_none=True)
|
||||
self.assertIsNone(p.scheme)
|
||||
|
||||
def test_attributes_without_netloc(self):
|
||||
@support.subTests('missing_as_none', (False, True))
|
||||
def test_attributes_without_netloc(self, missing_as_none):
|
||||
# This example is straight from RFC 3261. It looks like it
|
||||
# should allow the username, hostname, and port to be filled
|
||||
# in, but doesn't. Since it's a URI and doesn't use the
|
||||
# scheme://netloc syntax, the netloc and related attributes
|
||||
# should be left empty.
|
||||
uri = "sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"
|
||||
p = urllib.parse.urlsplit(uri)
|
||||
self.assertEqual(p.netloc, "")
|
||||
p = urllib.parse.urlsplit(uri, missing_as_none=missing_as_none)
|
||||
self.assertEqual(p.netloc, None if missing_as_none else "")
|
||||
self.assertEqual(p.username, None)
|
||||
self.assertEqual(p.password, None)
|
||||
self.assertEqual(p.hostname, None)
|
||||
self.assertEqual(p.port, None)
|
||||
self.assertEqual(p.geturl(), uri)
|
||||
|
||||
p = urllib.parse.urlparse(uri)
|
||||
self.assertEqual(p.netloc, "")
|
||||
p = urllib.parse.urlparse(uri, missing_as_none=missing_as_none)
|
||||
self.assertEqual(p.netloc, None if missing_as_none else "")
|
||||
self.assertEqual(p.username, None)
|
||||
self.assertEqual(p.password, None)
|
||||
self.assertEqual(p.hostname, None)
|
||||
@@ -1029,16 +1073,16 @@ class UrlParseTestCase(unittest.TestCase):
|
||||
|
||||
# You guessed it, repeating the test with bytes input
|
||||
uri = b"sip:alice@atlanta.com;maddr=239.255.255.1;ttl=15"
|
||||
p = urllib.parse.urlsplit(uri)
|
||||
self.assertEqual(p.netloc, b"")
|
||||
p = urllib.parse.urlsplit(uri, missing_as_none=missing_as_none)
|
||||
self.assertEqual(p.netloc, None if missing_as_none else b"")
|
||||
self.assertEqual(p.username, None)
|
||||
self.assertEqual(p.password, None)
|
||||
self.assertEqual(p.hostname, None)
|
||||
self.assertEqual(p.port, None)
|
||||
self.assertEqual(p.geturl(), uri)
|
||||
|
||||
p = urllib.parse.urlparse(uri)
|
||||
self.assertEqual(p.netloc, b"")
|
||||
p = urllib.parse.urlparse(uri, missing_as_none=missing_as_none)
|
||||
self.assertEqual(p.netloc, None if missing_as_none else b"")
|
||||
self.assertEqual(p.username, None)
|
||||
self.assertEqual(p.password, None)
|
||||
self.assertEqual(p.hostname, None)
|
||||
@@ -1052,67 +1096,86 @@ class UrlParseTestCase(unittest.TestCase):
|
||||
self.assertEqual(urllib.parse.urlparse(b"http://example.com?blahblah=/foo"),
|
||||
(b'http', b'example.com', b'', b'', b'blahblah=/foo', b''))
|
||||
|
||||
def test_withoutscheme(self):
|
||||
@support.subTests('missing_as_none', (False, True))
|
||||
def test_withoutscheme(self, missing_as_none):
|
||||
# Test urlparse without scheme
|
||||
# Issue 754016: urlparse goes wrong with IP:port without scheme
|
||||
# RFC 1808 specifies that netloc should start with //, urlparse expects
|
||||
# the same, otherwise it classifies the portion of url as path.
|
||||
self.assertEqual(urllib.parse.urlparse("path"),
|
||||
('','','path','','',''))
|
||||
self.assertEqual(urllib.parse.urlparse("//www.python.org:80"),
|
||||
('','www.python.org:80','','','',''))
|
||||
self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
|
||||
('http','www.python.org:80','','','',''))
|
||||
none = None if missing_as_none else ''
|
||||
self.assertEqual(urlparse("path", missing_as_none=missing_as_none),
|
||||
(none, none, 'path', none, none, none))
|
||||
self.assertEqual(urlparse("//www.python.org:80", missing_as_none=missing_as_none),
|
||||
(none, 'www.python.org:80', '', none, none, none))
|
||||
self.assertEqual(urlparse("http://www.python.org:80", missing_as_none=missing_as_none),
|
||||
('http', 'www.python.org:80', '', none, none, none))
|
||||
# Repeat for bytes input
|
||||
self.assertEqual(urllib.parse.urlparse(b"path"),
|
||||
(b'',b'',b'path',b'',b'',b''))
|
||||
self.assertEqual(urllib.parse.urlparse(b"//www.python.org:80"),
|
||||
(b'',b'www.python.org:80',b'',b'',b'',b''))
|
||||
self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),
|
||||
(b'http',b'www.python.org:80',b'',b'',b'',b''))
|
||||
none = None if missing_as_none else b''
|
||||
self.assertEqual(urlparse(b"path", missing_as_none=missing_as_none),
|
||||
(none, none, b'path', none, none, none))
|
||||
self.assertEqual(urlparse(b"//www.python.org:80", missing_as_none=missing_as_none),
|
||||
(none, b'www.python.org:80', b'', none, none, none))
|
||||
self.assertEqual(urlparse(b"http://www.python.org:80", missing_as_none=missing_as_none),
|
||||
(b'http', b'www.python.org:80', b'', none, none, none))
|
||||
|
||||
def test_portseparator(self):
|
||||
@support.subTests('missing_as_none', (False, True))
|
||||
def test_portseparator(self, missing_as_none):
|
||||
# Issue 754016 makes changes for port separator ':' from scheme separator
|
||||
self.assertEqual(urllib.parse.urlparse("http:80"), ('http','','80','','',''))
|
||||
self.assertEqual(urllib.parse.urlparse("https:80"), ('https','','80','','',''))
|
||||
self.assertEqual(urllib.parse.urlparse("path:80"), ('path','','80','','',''))
|
||||
self.assertEqual(urllib.parse.urlparse("http:"),('http','','','','',''))
|
||||
self.assertEqual(urllib.parse.urlparse("https:"),('https','','','','',''))
|
||||
self.assertEqual(urllib.parse.urlparse("http://www.python.org:80"),
|
||||
('http','www.python.org:80','','','',''))
|
||||
none = None if missing_as_none else ''
|
||||
self.assertEqual(urlparse("http:80", missing_as_none=missing_as_none),
|
||||
('http', none, '80', none, none, none))
|
||||
self.assertEqual(urlparse("https:80", missing_as_none=missing_as_none),
|
||||
('https', none, '80', none, none, none))
|
||||
self.assertEqual(urlparse("path:80", missing_as_none=missing_as_none),
|
||||
('path', none, '80', none, none, none))
|
||||
self.assertEqual(urlparse("http:", missing_as_none=missing_as_none),
|
||||
('http', none, '', none, none, none))
|
||||
self.assertEqual(urlparse("https:", missing_as_none=missing_as_none),
|
||||
('https', none, '', none, none, none))
|
||||
self.assertEqual(urlparse("http://www.python.org:80", missing_as_none=missing_as_none),
|
||||
('http', 'www.python.org:80', '', none, none, none))
|
||||
# As usual, need to check bytes input as well
|
||||
self.assertEqual(urllib.parse.urlparse(b"http:80"), (b'http',b'',b'80',b'',b'',b''))
|
||||
self.assertEqual(urllib.parse.urlparse(b"https:80"), (b'https',b'',b'80',b'',b'',b''))
|
||||
self.assertEqual(urllib.parse.urlparse(b"path:80"), (b'path',b'',b'80',b'',b'',b''))
|
||||
self.assertEqual(urllib.parse.urlparse(b"http:"),(b'http',b'',b'',b'',b'',b''))
|
||||
self.assertEqual(urllib.parse.urlparse(b"https:"),(b'https',b'',b'',b'',b'',b''))
|
||||
self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80"),
|
||||
(b'http',b'www.python.org:80',b'',b'',b'',b''))
|
||||
none = None if missing_as_none else b''
|
||||
self.assertEqual(urlparse(b"http:80", missing_as_none=missing_as_none),
|
||||
(b'http', none, b'80', none, none, none))
|
||||
self.assertEqual(urlparse(b"https:80", missing_as_none=missing_as_none),
|
||||
(b'https', none, b'80', none, none, none))
|
||||
self.assertEqual(urlparse(b"path:80", missing_as_none=missing_as_none),
|
||||
(b'path', none, b'80', none, none, none))
|
||||
self.assertEqual(urlparse(b"http:", missing_as_none=missing_as_none),
|
||||
(b'http', none, b'', none, none, none))
|
||||
self.assertEqual(urlparse(b"https:", missing_as_none=missing_as_none),
|
||||
(b'https', none, b'', none, none, none))
|
||||
self.assertEqual(urlparse(b"http://www.python.org:80", missing_as_none=missing_as_none),
|
||||
(b'http', b'www.python.org:80', b'', none, none, none))
|
||||
|
||||
def test_usingsys(self):
|
||||
# Issue 3314: sys module is used in the error
|
||||
self.assertRaises(TypeError, urllib.parse.urlencode, "foo")
|
||||
|
||||
def test_anyscheme(self):
|
||||
@support.subTests('missing_as_none', (False, True))
|
||||
def test_anyscheme(self, missing_as_none):
|
||||
# Issue 7904: s3://foo.com/stuff has netloc "foo.com".
|
||||
self.assertEqual(urllib.parse.urlparse("s3://foo.com/stuff"),
|
||||
('s3', 'foo.com', '/stuff', '', '', ''))
|
||||
self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff"),
|
||||
('x-newscheme', 'foo.com', '/stuff', '', '', ''))
|
||||
self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query#fragment"),
|
||||
('x-newscheme', 'foo.com', '/stuff', '', 'query', 'fragment'))
|
||||
self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query"),
|
||||
('x-newscheme', 'foo.com', '/stuff', '', 'query', ''))
|
||||
none = None if missing_as_none else ''
|
||||
self.assertEqual(urlparse("s3://foo.com/stuff", missing_as_none=missing_as_none),
|
||||
('s3', 'foo.com', '/stuff', none, none, none))
|
||||
self.assertEqual(urlparse("x-newscheme://foo.com/stuff", missing_as_none=missing_as_none),
|
||||
('x-newscheme', 'foo.com', '/stuff', none, none, none))
|
||||
self.assertEqual(urlparse("x-newscheme://foo.com/stuff?query#fragment", missing_as_none=missing_as_none),
|
||||
('x-newscheme', 'foo.com', '/stuff', none, 'query', 'fragment'))
|
||||
self.assertEqual(urlparse("x-newscheme://foo.com/stuff?query", missing_as_none=missing_as_none),
|
||||
('x-newscheme', 'foo.com', '/stuff', none, 'query', none))
|
||||
|
||||
# And for bytes...
|
||||
self.assertEqual(urllib.parse.urlparse(b"s3://foo.com/stuff"),
|
||||
(b's3', b'foo.com', b'/stuff', b'', b'', b''))
|
||||
self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff"),
|
||||
(b'x-newscheme', b'foo.com', b'/stuff', b'', b'', b''))
|
||||
self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query#fragment"),
|
||||
(b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b'fragment'))
|
||||
self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query"),
|
||||
(b'x-newscheme', b'foo.com', b'/stuff', b'', b'query', b''))
|
||||
none = None if missing_as_none else b''
|
||||
self.assertEqual(urlparse(b"s3://foo.com/stuff", missing_as_none=missing_as_none),
|
||||
(b's3', b'foo.com', b'/stuff', none, none, none))
|
||||
self.assertEqual(urlparse(b"x-newscheme://foo.com/stuff", missing_as_none=missing_as_none),
|
||||
(b'x-newscheme', b'foo.com', b'/stuff', none, none, none))
|
||||
self.assertEqual(urlparse(b"x-newscheme://foo.com/stuff?query#fragment", missing_as_none=missing_as_none),
|
||||
(b'x-newscheme', b'foo.com', b'/stuff', none, b'query', b'fragment'))
|
||||
self.assertEqual(urlparse(b"x-newscheme://foo.com/stuff?query", missing_as_none=missing_as_none),
|
||||
(b'x-newscheme', b'foo.com', b'/stuff', none, b'query', none))
|
||||
|
||||
@support.subTests('func', (urllib.parse.urlparse, urllib.parse.urlsplit))
|
||||
def test_default_scheme(self, func):
|
||||
@@ -1125,8 +1188,11 @@ class UrlParseTestCase(unittest.TestCase):
|
||||
self.assertEqual(func("path", scheme="ftp").scheme, "ftp")
|
||||
self.assertEqual(func(b"path", scheme=b"ftp").scheme, b"ftp")
|
||||
self.assertEqual(func("path").scheme, "")
|
||||
self.assertEqual(func("path", missing_as_none=True).scheme, None)
|
||||
self.assertEqual(func(b"path").scheme, b"")
|
||||
self.assertEqual(func(b"path", missing_as_none=True).scheme, None)
|
||||
self.assertEqual(func(b"path", "").scheme, b"")
|
||||
self.assertEqual(func(b"path", "", missing_as_none=True).scheme, b"")
|
||||
|
||||
@support.subTests('url,attr,expected_frag', (
|
||||
("http:#frag", "path", "frag"),
|
||||
@@ -1151,9 +1217,16 @@ class UrlParseTestCase(unittest.TestCase):
|
||||
"#" + expected_frag)
|
||||
self.assertEqual(func(url, "", False).fragment, "")
|
||||
|
||||
result = func(url, allow_fragments=False, missing_as_none=True)
|
||||
self.assertIsNone(result.fragment)
|
||||
self.assertTrue(
|
||||
getattr(result, attr).endswith("#" + expected_frag))
|
||||
self.assertIsNone(func(url, "", False, missing_as_none=True).fragment)
|
||||
|
||||
result = func(url, allow_fragments=True)
|
||||
self.assertEqual(result.fragment, expected_frag)
|
||||
self.assertNotEndsWith(getattr(result, attr), expected_frag)
|
||||
self.assertFalse(
|
||||
getattr(result, attr).endswith(expected_frag))
|
||||
self.assertEqual(func(url, "", True).fragment,
|
||||
expected_frag)
|
||||
self.assertEqual(func(url).fragment, expected_frag)
|
||||
@@ -1182,19 +1255,10 @@ class UrlParseTestCase(unittest.TestCase):
|
||||
with self.assertRaisesRegex(TypeError, "Cannot mix str"):
|
||||
urllib.parse.urljoin(b"http://python.org", "http://python.org")
|
||||
|
||||
@support.subTests('result_type', [
|
||||
urllib.parse.DefragResult,
|
||||
urllib.parse.SplitResult,
|
||||
urllib.parse.ParseResult,
|
||||
])
|
||||
def test_result_pairs(self, result_type):
|
||||
# Check encoding and decoding between result pairs
|
||||
str_type = result_type
|
||||
num_args = len(str_type._fields)
|
||||
def _check_result_type(self, str_type, str_args):
|
||||
bytes_type = str_type._encoded_counterpart
|
||||
self.assertIs(bytes_type._decoded_counterpart, str_type)
|
||||
str_args = ('',) * num_args
|
||||
bytes_args = (b'',) * num_args
|
||||
bytes_args = tuple_encode(str_args)
|
||||
str_result = str_type(*str_args)
|
||||
bytes_result = bytes_type(*bytes_args)
|
||||
encoding = 'ascii'
|
||||
@@ -1213,6 +1277,169 @@ class UrlParseTestCase(unittest.TestCase):
|
||||
self.assertEqual(str_result.encode(encoding), bytes_result)
|
||||
self.assertEqual(str_result.encode(encoding, errors), bytes_args)
|
||||
self.assertEqual(str_result.encode(encoding, errors), bytes_result)
|
||||
for result in str_result, bytes_result:
|
||||
self.assertEqual(copy.copy(result), result)
|
||||
self.assertEqual(copy.deepcopy(result), result)
|
||||
self.assertEqual(copy.replace(result), result)
|
||||
self.assertEqual(result._replace(), result)
|
||||
|
||||
def test_result_pairs__(self):
|
||||
# Check encoding and decoding between result pairs
|
||||
self._check_result_type(urllib.parse.DefragResult, ('', ''))
|
||||
self._check_result_type(urllib.parse.DefragResult, ('', None))
|
||||
self._check_result_type(urllib.parse.SplitResult, ('', '', '', '', ''))
|
||||
self._check_result_type(urllib.parse.SplitResult, (None, None, '', None, None))
|
||||
self._check_result_type(urllib.parse.ParseResult, ('', '', '', '', '', ''))
|
||||
self._check_result_type(urllib.parse.ParseResult, (None, None, '', None, None, None))
|
||||
|
||||
def test_result_encoding_decoding(self):
|
||||
def check(str_result, bytes_result):
|
||||
self.assertEqual(str_result.encode(), bytes_result)
|
||||
self.assertEqual(str_result.encode().geturl(), bytes_result.geturl())
|
||||
self.assertEqual(bytes_result.decode(), str_result)
|
||||
self.assertEqual(bytes_result.decode().geturl(), str_result.geturl())
|
||||
|
||||
url = 'http://example.com/?#'
|
||||
burl = url.encode()
|
||||
for func in urldefrag, urlsplit, urlparse:
|
||||
check(func(url, missing_as_none=True), func(burl, missing_as_none=True))
|
||||
check(func(url), func(burl))
|
||||
|
||||
def test_result_copying(self):
|
||||
def check(result):
|
||||
result2 = copy.copy(result)
|
||||
self.assertEqual(result2, result)
|
||||
self.assertEqual(result2.geturl(), result.geturl())
|
||||
result2 = copy.deepcopy(result)
|
||||
self.assertEqual(result2, result)
|
||||
self.assertEqual(result2.geturl(), result.geturl())
|
||||
result2 = copy.replace(result)
|
||||
self.assertEqual(result2, result)
|
||||
self.assertEqual(result2.geturl(), result.geturl())
|
||||
result2 = result._replace()
|
||||
self.assertEqual(result2, result)
|
||||
self.assertEqual(result2.geturl(), result.geturl())
|
||||
|
||||
url = 'http://example.com/?#'
|
||||
burl = url.encode()
|
||||
for func in urldefrag, urlsplit, urlparse:
|
||||
check(func(url))
|
||||
check(func(url, missing_as_none=True))
|
||||
check(func(burl))
|
||||
check(func(burl, missing_as_none=True))
|
||||
|
||||
def test_result_pickling(self):
|
||||
def check(result):
|
||||
for proto in range(pickle.HIGHEST_PROTOCOL + 1):
|
||||
pickled = pickle.dumps(result, proto)
|
||||
result2 = pickle.loads(pickled)
|
||||
self.assertEqual(result2, result)
|
||||
self.assertEqual(result2.geturl(), result.geturl())
|
||||
|
||||
url = 'http://example.com/?#'
|
||||
burl = url.encode()
|
||||
for func in urldefrag, urlsplit, urlparse:
|
||||
check(func(url))
|
||||
check(func(url, missing_as_none=True))
|
||||
check(func(burl))
|
||||
check(func(burl, missing_as_none=True))
|
||||
|
||||
def test_result_compat_unpickling(self):
|
||||
def check(result, pickles):
|
||||
for pickled in pickles:
|
||||
result2 = pickle.loads(pickled)
|
||||
self.assertEqual(result2, result)
|
||||
self.assertEqual(result2.geturl(), result.geturl())
|
||||
|
||||
url = 'http://example.com/?#'
|
||||
burl = url.encode()
|
||||
# Pre-3.15 data.
|
||||
check(urldefrag(url), (
|
||||
b'ccopy_reg\n_reconstructor\n(curlparse\nDefragResult\nc__builtin__\ntuple\n(Vhttp://example.com/?\nV\nttR.',
|
||||
b'ccopy_reg\n_reconstructor\n(curlparse\nDefragResult\nc__builtin__\ntuple\n(X\x14\x00\x00\x00http://example.com/?X\x00\x00\x00\x00ttR.',
|
||||
b'\x80\x02curlparse\nDefragResult\nX\x14\x00\x00\x00http://example.com/?X\x00\x00\x00\x00\x86\x81.',
|
||||
b'\x80\x03curllib.parse\nDefragResult\nX\x14\x00\x00\x00http://example.com/?X\x00\x00\x00\x00\x86\x81.',
|
||||
b'\x80\x04\x958\x00\x00\x00\x00\x00\x00\x00\x8c\x0curllib.parse\x8c\x0cDefragResult\x93\x8c\x14http://example.com/?\x8c\x00\x86\x81.',
|
||||
))
|
||||
check(urldefrag(burl), (
|
||||
b'ccopy_reg\n_reconstructor\n(curlparse\nDefragResultBytes\nc__builtin__\ntuple\n(c_codecs\nencode\n(Vhttp://example.com/?\nVlatin1\ntRc__builtin__\nbytes\n(tRttR.',
|
||||
b'ccopy_reg\n_reconstructor\n(curlparse\nDefragResultBytes\nc__builtin__\ntuple\n(c_codecs\nencode\n(X\x14\x00\x00\x00http://example.com/?X\x06\x00\x00\x00latin1tRc__builtin__\nbytes\n)RttR.',
|
||||
b'\x80\x02curlparse\nDefragResultBytes\nc_codecs\nencode\nX\x14\x00\x00\x00http://example.com/?X\x06\x00\x00\x00latin1\x86Rc__builtin__\nbytes\n)R\x86\x81.',
|
||||
b'\x80\x03curllib.parse\nDefragResultBytes\nC\x14http://example.com/?C\x00\x86\x81.',
|
||||
b'\x80\x04\x95=\x00\x00\x00\x00\x00\x00\x00\x8c\x0curllib.parse\x8c\x11DefragResultBytes\x93C\x14http://example.com/?C\x00\x86\x81.',
|
||||
))
|
||||
check(urlsplit(url), (
|
||||
b'ccopy_reg\n_reconstructor\n(curlparse\nSplitResult\nc__builtin__\ntuple\n(Vhttp\nVexample.com\nV/\nV\np0\ng0\nttR.',
|
||||
b'ccopy_reg\n_reconstructor\n(curlparse\nSplitResult\nc__builtin__\ntuple\n(X\x04\x00\x00\x00httpX\x0b\x00\x00\x00example.comX\x01\x00\x00\x00/X\x00\x00\x00\x00q\x00h\x00ttR.',
|
||||
b'\x80\x02curlparse\nSplitResult\n(X\x04\x00\x00\x00httpX\x0b\x00\x00\x00example.comX\x01\x00\x00\x00/X\x00\x00\x00\x00q\x00h\x00t\x81.',
|
||||
b'\x80\x03curllib.parse\nSplitResult\n(X\x04\x00\x00\x00httpX\x0b\x00\x00\x00example.comX\x01\x00\x00\x00/X\x00\x00\x00\x00q\x00h\x00t\x81.',
|
||||
b'\x80\x04\x95;\x00\x00\x00\x00\x00\x00\x00\x8c\x0curllib.parse\x8c\x0bSplitResult\x93(\x8c\x04http\x8c\x0bexample.com\x8c\x01/\x8c\x00\x94h\x00t\x81.',
|
||||
))
|
||||
check(urlsplit(burl), (
|
||||
b'ccopy_reg\n_reconstructor\n(curlparse\nSplitResultBytes\nc__builtin__\ntuple\n(c_codecs\nencode\np0\n(Vhttp\nVlatin1\np1\ntRg0\n(Vexample.com\ng1\ntRg0\n(V/\ng1\ntRc__builtin__\nbytes\n(tRp2\ng2\nttR.',
|
||||
b'ccopy_reg\n_reconstructor\n(curlparse\nSplitResultBytes\nc__builtin__\ntuple\n(c_codecs\nencode\nq\x00(X\x04\x00\x00\x00httpX\x06\x00\x00\x00latin1q\x01tRh\x00(X\x0b\x00\x00\x00example.comh\x01tRh\x00(X\x01\x00\x00\x00/h\x01tRc__builtin__\nbytes\n)Rq\x02h\x02ttR.',
|
||||
b'\x80\x02curlparse\nSplitResultBytes\n(c_codecs\nencode\nq\x00X\x04\x00\x00\x00httpX\x06\x00\x00\x00latin1q\x01\x86Rh\x00X\x0b\x00\x00\x00example.comh\x01\x86Rh\x00X\x01\x00\x00\x00/h\x01\x86Rc__builtin__\nbytes\n)Rq\x02h\x02t\x81.',
|
||||
b'\x80\x03curllib.parse\nSplitResultBytes\n(C\x04httpC\x0bexample.comC\x01/C\x00q\x00h\x00t\x81.',
|
||||
b'\x80\x04\x95@\x00\x00\x00\x00\x00\x00\x00\x8c\x0curllib.parse\x8c\x10SplitResultBytes\x93(C\x04httpC\x0bexample.comC\x01/C\x00\x94h\x00t\x81.',
|
||||
))
|
||||
check(urlparse(url), (
|
||||
b'ccopy_reg\n_reconstructor\n(curlparse\nParseResult\nc__builtin__\ntuple\n(Vhttp\nVexample.com\nV/\nV\np0\ng0\ng0\nttR.',
|
||||
b'ccopy_reg\n_reconstructor\n(curlparse\nParseResult\nc__builtin__\ntuple\n(X\x04\x00\x00\x00httpX\x0b\x00\x00\x00example.comX\x01\x00\x00\x00/X\x00\x00\x00\x00q\x00h\x00h\x00ttR.',
|
||||
b'\x80\x02curlparse\nParseResult\n(X\x04\x00\x00\x00httpX\x0b\x00\x00\x00example.comX\x01\x00\x00\x00/X\x00\x00\x00\x00q\x00h\x00h\x00t\x81.',
|
||||
b'\x80\x03curllib.parse\nParseResult\n(X\x04\x00\x00\x00httpX\x0b\x00\x00\x00example.comX\x01\x00\x00\x00/X\x00\x00\x00\x00q\x00h\x00h\x00t\x81.',
|
||||
b'\x80\x04\x95=\x00\x00\x00\x00\x00\x00\x00\x8c\x0curllib.parse\x8c\x0bParseResult\x93(\x8c\x04http\x8c\x0bexample.com\x8c\x01/\x8c\x00\x94h\x00h\x00t\x81.',
|
||||
))
|
||||
check(urlparse(burl), (
|
||||
b'ccopy_reg\n_reconstructor\n(curlparse\nParseResultBytes\nc__builtin__\ntuple\n(c_codecs\nencode\np0\n(Vhttp\nVlatin1\np1\ntRg0\n(Vexample.com\ng1\ntRg0\n(V/\ng1\ntRc__builtin__\nbytes\n(tRp2\ng2\ng2\nttR.',
|
||||
b'ccopy_reg\n_reconstructor\n(curlparse\nParseResultBytes\nc__builtin__\ntuple\n(c_codecs\nencode\nq\x00(X\x04\x00\x00\x00httpX\x06\x00\x00\x00latin1q\x01tRh\x00(X\x0b\x00\x00\x00example.comh\x01tRh\x00(X\x01\x00\x00\x00/h\x01tRc__builtin__\nbytes\n)Rq\x02h\x02h\x02ttR.',
|
||||
b'\x80\x02curlparse\nParseResultBytes\n(c_codecs\nencode\nq\x00X\x04\x00\x00\x00httpX\x06\x00\x00\x00latin1q\x01\x86Rh\x00X\x0b\x00\x00\x00example.comh\x01\x86Rh\x00X\x01\x00\x00\x00/h\x01\x86Rc__builtin__\nbytes\n)Rq\x02h\x02h\x02t\x81.',
|
||||
b'\x80\x03curllib.parse\nParseResultBytes\n(C\x04httpC\x0bexample.comC\x01/C\x00q\x00h\x00h\x00t\x81.',
|
||||
b'\x80\x04\x95B\x00\x00\x00\x00\x00\x00\x00\x8c\x0curllib.parse\x8c\x10ParseResultBytes\x93(C\x04httpC\x0bexample.comC\x01/C\x00\x94h\x00h\x00t\x81.',
|
||||
))
|
||||
|
||||
# 3.15 data with missing_as_none=True.
|
||||
check(urldefrag(url, missing_as_none=True), (
|
||||
b'ccopy_reg\n_reconstructor\n(curlparse\nDefragResult\nc__builtin__\ntuple\n(Vhttp://example.com/?\nV\nttR(N(dV_keep_empty\nI01\nstb.',
|
||||
b'ccopy_reg\n_reconstructor\n(curlparse\nDefragResult\nc__builtin__\ntuple\n(X\x14\x00\x00\x00http://example.com/?X\x00\x00\x00\x00ttR(N}X\x0b\x00\x00\x00_keep_emptyI01\nstb.',
|
||||
b'\x80\x02curlparse\nDefragResult\nX\x14\x00\x00\x00http://example.com/?X\x00\x00\x00\x00\x86\x81N}X\x0b\x00\x00\x00_keep_empty\x88s\x86b.',
|
||||
b'\x80\x03curllib.parse\nDefragResult\nX\x14\x00\x00\x00http://example.com/?X\x00\x00\x00\x00\x86\x81N}X\x0b\x00\x00\x00_keep_empty\x88s\x86b.',
|
||||
b'\x80\x04\x95K\x00\x00\x00\x00\x00\x00\x00\x8c\x0curllib.parse\x8c\x0cDefragResult\x93\x8c\x14http://example.com/?\x8c\x00\x86\x81N}\x8c\x0b_keep_empty\x88s\x86b.',
|
||||
))
|
||||
check(urldefrag(burl, missing_as_none=True), (
|
||||
b'ccopy_reg\n_reconstructor\n(curlparse\nDefragResultBytes\nc__builtin__\ntuple\n(c_codecs\nencode\n(Vhttp://example.com/?\nVlatin1\ntRc__builtin__\nbytes\n(tRttR(N(dV_keep_empty\nI01\nstb.',
|
||||
b'ccopy_reg\n_reconstructor\n(curlparse\nDefragResultBytes\nc__builtin__\ntuple\n(c_codecs\nencode\n(X\x14\x00\x00\x00http://example.com/?X\x06\x00\x00\x00latin1tRc__builtin__\nbytes\n)RttR(N}X\x0b\x00\x00\x00_keep_emptyI01\nstb.',
|
||||
b'\x80\x02curlparse\nDefragResultBytes\nc_codecs\nencode\nX\x14\x00\x00\x00http://example.com/?X\x06\x00\x00\x00latin1\x86Rc__builtin__\nbytes\n)R\x86\x81N}X\x0b\x00\x00\x00_keep_empty\x88s\x86b.',
|
||||
b'\x80\x03curllib.parse\nDefragResultBytes\nC\x14http://example.com/?C\x00\x86\x81N}X\x0b\x00\x00\x00_keep_empty\x88s\x86b.',
|
||||
b'\x80\x04\x95P\x00\x00\x00\x00\x00\x00\x00\x8c\x0curllib.parse\x8c\x11DefragResultBytes\x93C\x14http://example.com/?C\x00\x86\x81N}\x8c\x0b_keep_empty\x88s\x86b.',
|
||||
))
|
||||
check(urlsplit(url, missing_as_none=True), (
|
||||
b'ccopy_reg\n_reconstructor\n(curlparse\nSplitResult\nc__builtin__\ntuple\n(Vhttp\nVexample.com\nV/\nV\np0\ng0\nttR(N(dV_keep_empty\nI01\nstb.',
|
||||
b'ccopy_reg\n_reconstructor\n(curlparse\nSplitResult\nc__builtin__\ntuple\n(X\x04\x00\x00\x00httpX\x0b\x00\x00\x00example.comX\x01\x00\x00\x00/X\x00\x00\x00\x00q\x00h\x00ttR(N}X\x0b\x00\x00\x00_keep_emptyI01\nstb.',
|
||||
b'\x80\x02curlparse\nSplitResult\n(X\x04\x00\x00\x00httpX\x0b\x00\x00\x00example.comX\x01\x00\x00\x00/X\x00\x00\x00\x00q\x00h\x00t\x81N}X\x0b\x00\x00\x00_keep_empty\x88s\x86b.',
|
||||
b'\x80\x03curllib.parse\nSplitResult\n(X\x04\x00\x00\x00httpX\x0b\x00\x00\x00example.comX\x01\x00\x00\x00/X\x00\x00\x00\x00q\x00h\x00t\x81N}X\x0b\x00\x00\x00_keep_empty\x88s\x86b.',
|
||||
b'\x80\x04\x95N\x00\x00\x00\x00\x00\x00\x00\x8c\x0curllib.parse\x8c\x0bSplitResult\x93(\x8c\x04http\x8c\x0bexample.com\x8c\x01/\x8c\x00\x94h\x00t\x81N}\x8c\x0b_keep_empty\x88s\x86b.',
|
||||
))
|
||||
check(urlsplit(burl, missing_as_none=True), (
|
||||
b'ccopy_reg\n_reconstructor\n(curlparse\nSplitResultBytes\nc__builtin__\ntuple\n(c_codecs\nencode\np0\n(Vhttp\nVlatin1\np1\ntRg0\n(Vexample.com\ng1\ntRg0\n(V/\ng1\ntRc__builtin__\nbytes\n(tRp2\ng2\nttR(N(dV_keep_empty\nI01\nstb.',
|
||||
b'ccopy_reg\n_reconstructor\n(curlparse\nSplitResultBytes\nc__builtin__\ntuple\n(c_codecs\nencode\nq\x00(X\x04\x00\x00\x00httpX\x06\x00\x00\x00latin1q\x01tRh\x00(X\x0b\x00\x00\x00example.comh\x01tRh\x00(X\x01\x00\x00\x00/h\x01tRc__builtin__\nbytes\n)Rq\x02h\x02ttR(N}X\x0b\x00\x00\x00_keep_emptyI01\nstb.',
|
||||
b'\x80\x02curlparse\nSplitResultBytes\n(c_codecs\nencode\nq\x00X\x04\x00\x00\x00httpX\x06\x00\x00\x00latin1q\x01\x86Rh\x00X\x0b\x00\x00\x00example.comh\x01\x86Rh\x00X\x01\x00\x00\x00/h\x01\x86Rc__builtin__\nbytes\n)Rq\x02h\x02t\x81N}X\x0b\x00\x00\x00_keep_empty\x88s\x86b.',
|
||||
b'\x80\x03curllib.parse\nSplitResultBytes\n(C\x04httpC\x0bexample.comC\x01/C\x00q\x00h\x00t\x81N}X\x0b\x00\x00\x00_keep_empty\x88s\x86b.',
|
||||
b'\x80\x04\x95S\x00\x00\x00\x00\x00\x00\x00\x8c\x0curllib.parse\x8c\x10SplitResultBytes\x93(C\x04httpC\x0bexample.comC\x01/C\x00\x94h\x00t\x81N}\x8c\x0b_keep_empty\x88s\x86b.',
|
||||
))
|
||||
check(urlparse(url, missing_as_none=True), (
|
||||
b'ccopy_reg\n_reconstructor\n(curlparse\nParseResult\nc__builtin__\ntuple\n(Vhttp\nVexample.com\nV/\nNV\np0\ng0\nttR(N(dV_keep_empty\nI01\nstb.',
|
||||
b'ccopy_reg\n_reconstructor\n(curlparse\nParseResult\nc__builtin__\ntuple\n(X\x04\x00\x00\x00httpX\x0b\x00\x00\x00example.comX\x01\x00\x00\x00/NX\x00\x00\x00\x00q\x00h\x00ttR(N}X\x0b\x00\x00\x00_keep_emptyI01\nstb.',
|
||||
b'\x80\x02curlparse\nParseResult\n(X\x04\x00\x00\x00httpX\x0b\x00\x00\x00example.comX\x01\x00\x00\x00/NX\x00\x00\x00\x00q\x00h\x00t\x81N}X\x0b\x00\x00\x00_keep_empty\x88s\x86b.',
|
||||
b'\x80\x03curllib.parse\nParseResult\n(X\x04\x00\x00\x00httpX\x0b\x00\x00\x00example.comX\x01\x00\x00\x00/NX\x00\x00\x00\x00q\x00h\x00t\x81N}X\x0b\x00\x00\x00_keep_empty\x88s\x86b.',
|
||||
b'\x80\x04\x95O\x00\x00\x00\x00\x00\x00\x00\x8c\x0curllib.parse\x8c\x0bParseResult\x93(\x8c\x04http\x8c\x0bexample.com\x8c\x01/N\x8c\x00\x94h\x00t\x81N}\x8c\x0b_keep_empty\x88s\x86b.',
|
||||
))
|
||||
check(urlparse(burl, missing_as_none=True), (
|
||||
b'ccopy_reg\n_reconstructor\n(curlparse\nParseResultBytes\nc__builtin__\ntuple\n(c_codecs\nencode\np0\n(Vhttp\nVlatin1\np1\ntRg0\n(Vexample.com\ng1\ntRg0\n(V/\ng1\ntRNc__builtin__\nbytes\n(tRp2\ng2\nttR(N(dV_keep_empty\nI01\nstb.',
|
||||
b'ccopy_reg\n_reconstructor\n(curlparse\nParseResultBytes\nc__builtin__\ntuple\n(c_codecs\nencode\nq\x00(X\x04\x00\x00\x00httpX\x06\x00\x00\x00latin1q\x01tRh\x00(X\x0b\x00\x00\x00example.comh\x01tRh\x00(X\x01\x00\x00\x00/h\x01tRNc__builtin__\nbytes\n)Rq\x02h\x02ttR(N}X\x0b\x00\x00\x00_keep_emptyI01\nstb.',
|
||||
b'\x80\x02curlparse\nParseResultBytes\n(c_codecs\nencode\nq\x00X\x04\x00\x00\x00httpX\x06\x00\x00\x00latin1q\x01\x86Rh\x00X\x0b\x00\x00\x00example.comh\x01\x86Rh\x00X\x01\x00\x00\x00/h\x01\x86RNc__builtin__\nbytes\n)Rq\x02h\x02t\x81N}X\x0b\x00\x00\x00_keep_empty\x88s\x86b.',
|
||||
b'\x80\x03curllib.parse\nParseResultBytes\n(C\x04httpC\x0bexample.comC\x01/NC\x00q\x00h\x00t\x81N}X\x0b\x00\x00\x00_keep_empty\x88s\x86b.',
|
||||
b'\x80\x04\x95T\x00\x00\x00\x00\x00\x00\x00\x8c\x0curllib.parse\x8c\x10ParseResultBytes\x93(C\x04httpC\x0bexample.comC\x01/NC\x00\x94h\x00t\x81N}\x8c\x0b_keep_empty\x88s\x86b.',
|
||||
))
|
||||
|
||||
def test_parse_qs_encoding(self):
|
||||
result = urllib.parse.parse_qs("key=\u0141%E9", encoding="latin-1")
|
||||
@@ -1457,6 +1684,11 @@ class UrlParseTestCase(unittest.TestCase):
|
||||
self.assertEqual(p1.path, '+1-201-555-0123')
|
||||
self.assertEqual(p1.params, '')
|
||||
|
||||
p1 = urllib.parse.urlparse('tel:+1-201-555-0123', missing_as_none=True)
|
||||
self.assertEqual(p1.scheme, 'tel')
|
||||
self.assertEqual(p1.path, '+1-201-555-0123')
|
||||
self.assertEqual(p1.params, None)
|
||||
|
||||
p1 = urllib.parse.urlparse('tel:7042;phone-context=example.com')
|
||||
self.assertEqual(p1.scheme, 'tel')
|
||||
self.assertEqual(p1.path, '7042')
|
||||
@@ -1757,6 +1989,8 @@ class DeprecationTest(unittest.TestCase):
|
||||
|
||||
|
||||
def str_encode(s):
|
||||
if s is None:
|
||||
return None
|
||||
return s.encode('ascii')
|
||||
|
||||
def tuple_encode(t):
|
||||
|
||||
+185
-65
@@ -112,7 +112,10 @@ def _encode_result(obj, encoding=_implicit_encoding,
|
||||
|
||||
def _decode_args(args, encoding=_implicit_encoding,
|
||||
errors=_implicit_errors):
|
||||
return tuple(x.decode(encoding, errors) if x else '' for x in args)
|
||||
return tuple(x.decode(encoding, errors) if x
|
||||
else '' if x is not None
|
||||
else None
|
||||
for x in args)
|
||||
|
||||
def _coerce_args(*args):
|
||||
# Invokes decode if necessary to create str args
|
||||
@@ -120,13 +123,20 @@ def _coerce_args(*args):
|
||||
# an appropriate result coercion function
|
||||
# - noop for str inputs
|
||||
# - encoding function otherwise
|
||||
str_input = isinstance(args[0], str)
|
||||
for arg in args[1:]:
|
||||
# We special-case the empty string to support the
|
||||
# "scheme=''" default argument to some functions
|
||||
if arg and isinstance(arg, str) != str_input:
|
||||
raise TypeError("Cannot mix str and non-str arguments")
|
||||
if str_input:
|
||||
str_input = None
|
||||
for arg in args:
|
||||
if arg:
|
||||
if str_input is None:
|
||||
str_input = isinstance(arg, str)
|
||||
else:
|
||||
if isinstance(arg, str) != str_input:
|
||||
raise TypeError("Cannot mix str and non-str arguments")
|
||||
if str_input is None:
|
||||
for arg in args:
|
||||
if arg is not None:
|
||||
str_input = isinstance(arg, str)
|
||||
break
|
||||
if str_input is not False:
|
||||
return args + (_noop,)
|
||||
return _decode_args(args) + (_encode_result,)
|
||||
|
||||
@@ -136,7 +146,14 @@ class _ResultMixinStr(object):
|
||||
__slots__ = ()
|
||||
|
||||
def encode(self, encoding='ascii', errors='strict'):
|
||||
return self._encoded_counterpart(*(x.encode(encoding, errors) for x in self))
|
||||
result = self._encoded_counterpart(*(x.encode(encoding, errors)
|
||||
if x is not None else None
|
||||
for x in self))
|
||||
try:
|
||||
result._keep_empty = self._keep_empty
|
||||
except AttributeError:
|
||||
pass
|
||||
return result
|
||||
|
||||
|
||||
class _ResultMixinBytes(object):
|
||||
@@ -144,7 +161,14 @@ class _ResultMixinBytes(object):
|
||||
__slots__ = ()
|
||||
|
||||
def decode(self, encoding='ascii', errors='strict'):
|
||||
return self._decoded_counterpart(*(x.decode(encoding, errors) for x in self))
|
||||
result = self._decoded_counterpart(*(x.decode(encoding, errors)
|
||||
if x is not None else None
|
||||
for x in self))
|
||||
try:
|
||||
result._keep_empty = self._keep_empty
|
||||
except AttributeError:
|
||||
pass
|
||||
return result
|
||||
|
||||
|
||||
class _NetlocResultMixinBase(object):
|
||||
@@ -191,6 +215,8 @@ class _NetlocResultMixinStr(_NetlocResultMixinBase, _ResultMixinStr):
|
||||
@property
|
||||
def _userinfo(self):
|
||||
netloc = self.netloc
|
||||
if netloc is None:
|
||||
return None, None
|
||||
userinfo, have_info, hostinfo = netloc.rpartition('@')
|
||||
if have_info:
|
||||
username, have_password, password = userinfo.partition(':')
|
||||
@@ -203,6 +229,8 @@ class _NetlocResultMixinStr(_NetlocResultMixinBase, _ResultMixinStr):
|
||||
@property
|
||||
def _hostinfo(self):
|
||||
netloc = self.netloc
|
||||
if netloc is None:
|
||||
return None, None
|
||||
_, _, hostinfo = netloc.rpartition('@')
|
||||
_, have_open_br, bracketed = hostinfo.partition('[')
|
||||
if have_open_br:
|
||||
@@ -221,6 +249,8 @@ class _NetlocResultMixinBytes(_NetlocResultMixinBase, _ResultMixinBytes):
|
||||
@property
|
||||
def _userinfo(self):
|
||||
netloc = self.netloc
|
||||
if netloc is None:
|
||||
return None, None
|
||||
userinfo, have_info, hostinfo = netloc.rpartition(b'@')
|
||||
if have_info:
|
||||
username, have_password, password = userinfo.partition(b':')
|
||||
@@ -233,6 +263,8 @@ class _NetlocResultMixinBytes(_NetlocResultMixinBase, _ResultMixinBytes):
|
||||
@property
|
||||
def _hostinfo(self):
|
||||
netloc = self.netloc
|
||||
if netloc is None:
|
||||
return None, None
|
||||
_, _, hostinfo = netloc.rpartition(b'@')
|
||||
_, have_open_br, bracketed = hostinfo.partition(b'[')
|
||||
if have_open_br:
|
||||
@@ -245,11 +277,70 @@ class _NetlocResultMixinBytes(_NetlocResultMixinBase, _ResultMixinBytes):
|
||||
return hostname, port
|
||||
|
||||
|
||||
_DefragResultBase = namedtuple('_DefragResultBase', 'url fragment')
|
||||
_SplitResultBase = namedtuple(
|
||||
'_SplitResultBase', 'scheme netloc path query fragment')
|
||||
_ParseResultBase = namedtuple(
|
||||
'_ParseResultBase', 'scheme netloc path params query fragment')
|
||||
_UNSPECIFIED = ['not specified']
|
||||
_MISSING_AS_NONE_DEFAULT = False
|
||||
|
||||
class _ResultBase:
|
||||
__slots__ = ()
|
||||
|
||||
def __replace__(self, /, **kwargs):
|
||||
result = super().__replace__(**kwargs)
|
||||
try:
|
||||
result._keep_empty = self._keep_empty
|
||||
except AttributeError:
|
||||
pass
|
||||
return result
|
||||
|
||||
def _replace(self, /, **kwargs):
|
||||
result = super()._replace(**kwargs)
|
||||
try:
|
||||
result._keep_empty = self._keep_empty
|
||||
except AttributeError:
|
||||
pass
|
||||
return result
|
||||
|
||||
def __copy__(self):
|
||||
return self
|
||||
|
||||
def __deepcopy__(self, memo):
|
||||
return self
|
||||
|
||||
def __getstate__(self):
|
||||
state = super().__getstate__()
|
||||
try:
|
||||
if state[1]['_keep_empty'] == _MISSING_AS_NONE_DEFAULT:
|
||||
del state[1]['_keep_empty']
|
||||
if state == (None, {}):
|
||||
state = None
|
||||
except LookupError:
|
||||
pass
|
||||
return state
|
||||
|
||||
|
||||
class _DefragResultBase(_ResultBase, namedtuple('_DefragResultBase', 'url fragment')):
|
||||
__slots__ = ('_keep_empty',)
|
||||
|
||||
def geturl(self):
|
||||
if self.fragment or (self.fragment is not None and
|
||||
getattr(self, '_keep_empty', _MISSING_AS_NONE_DEFAULT)):
|
||||
return self.url + self._HASH + self.fragment
|
||||
else:
|
||||
return self.url
|
||||
|
||||
class _SplitResultBase(_ResultBase, namedtuple(
|
||||
'_SplitResultBase', 'scheme netloc path query fragment')):
|
||||
__slots__ = ('_keep_empty',)
|
||||
|
||||
def geturl(self):
|
||||
return urlunsplit(self)
|
||||
|
||||
class _ParseResultBase(_ResultBase, namedtuple(
|
||||
'_ParseResultBase', 'scheme netloc path params query fragment')):
|
||||
__slots__ = ('_keep_empty',)
|
||||
|
||||
def geturl(self):
|
||||
return urlunparse(self)
|
||||
|
||||
|
||||
_DefragResultBase.__doc__ = """
|
||||
DefragResult(url, fragment)
|
||||
@@ -320,40 +411,24 @@ ResultBase = _NetlocResultMixinStr
|
||||
# Structured result objects for string data
|
||||
class DefragResult(_DefragResultBase, _ResultMixinStr):
|
||||
__slots__ = ()
|
||||
def geturl(self):
|
||||
if self.fragment:
|
||||
return self.url + '#' + self.fragment
|
||||
else:
|
||||
return self.url
|
||||
_HASH = '#'
|
||||
|
||||
class SplitResult(_SplitResultBase, _NetlocResultMixinStr):
|
||||
__slots__ = ()
|
||||
def geturl(self):
|
||||
return urlunsplit(self)
|
||||
|
||||
class ParseResult(_ParseResultBase, _NetlocResultMixinStr):
|
||||
__slots__ = ()
|
||||
def geturl(self):
|
||||
return urlunparse(self)
|
||||
|
||||
# Structured result objects for bytes data
|
||||
class DefragResultBytes(_DefragResultBase, _ResultMixinBytes):
|
||||
__slots__ = ()
|
||||
def geturl(self):
|
||||
if self.fragment:
|
||||
return self.url + b'#' + self.fragment
|
||||
else:
|
||||
return self.url
|
||||
_HASH = b'#'
|
||||
|
||||
class SplitResultBytes(_SplitResultBase, _NetlocResultMixinBytes):
|
||||
__slots__ = ()
|
||||
def geturl(self):
|
||||
return urlunsplit(self)
|
||||
|
||||
class ParseResultBytes(_ParseResultBase, _NetlocResultMixinBytes):
|
||||
__slots__ = ()
|
||||
def geturl(self):
|
||||
return urlunparse(self)
|
||||
|
||||
# Set up the encode/decode result pairs
|
||||
def _fix_result_transcoding():
|
||||
@@ -369,7 +444,7 @@ def _fix_result_transcoding():
|
||||
_fix_result_transcoding()
|
||||
del _fix_result_transcoding
|
||||
|
||||
def urlparse(url, scheme='', allow_fragments=True):
|
||||
def urlparse(url, scheme=None, allow_fragments=True, *, missing_as_none=_MISSING_AS_NONE_DEFAULT):
|
||||
"""Parse a URL into 6 components:
|
||||
<scheme>://<netloc>/<path>;<params>?<query>#<fragment>
|
||||
|
||||
@@ -390,23 +465,33 @@ def urlparse(url, scheme='', allow_fragments=True):
|
||||
Note that % escapes are not expanded.
|
||||
"""
|
||||
url, scheme, _coerce_result = _coerce_args(url, scheme)
|
||||
if url is None:
|
||||
url = ''
|
||||
scheme, netloc, url, params, query, fragment = _urlparse(url, scheme, allow_fragments)
|
||||
result = ParseResult(scheme or '', netloc or '', url, params or '', query or '', fragment or '')
|
||||
return _coerce_result(result)
|
||||
if not missing_as_none:
|
||||
if scheme is None: scheme = ''
|
||||
if netloc is None: netloc = ''
|
||||
if params is None: params = ''
|
||||
if query is None: query = ''
|
||||
if fragment is None: fragment = ''
|
||||
result = ParseResult(scheme, netloc, url, params, query, fragment)
|
||||
result = _coerce_result(result)
|
||||
result._keep_empty = missing_as_none
|
||||
return result
|
||||
|
||||
def _urlparse(url, scheme=None, allow_fragments=True):
|
||||
scheme, netloc, url, query, fragment = _urlsplit(url, scheme, allow_fragments)
|
||||
if (scheme or '') in uses_params and ';' in url:
|
||||
url, params = _splitparams(url, allow_none=True)
|
||||
url, params = _splitparams(url, missing_as_none=True)
|
||||
else:
|
||||
params = None
|
||||
return (scheme, netloc, url, params, query, fragment)
|
||||
|
||||
def _splitparams(url, allow_none=False):
|
||||
def _splitparams(url, missing_as_none=False):
|
||||
if '/' in url:
|
||||
i = url.find(';', url.rfind('/'))
|
||||
if i < 0:
|
||||
return url, None if allow_none else ''
|
||||
return url, None if missing_as_none else ''
|
||||
else:
|
||||
i = url.find(';')
|
||||
return url[:i], url[i+1:]
|
||||
@@ -468,7 +553,7 @@ def _check_bracketed_host(hostname):
|
||||
# typed=True avoids BytesWarnings being emitted during cache key
|
||||
# comparison since this API supports both bytes and str input.
|
||||
@functools.lru_cache(typed=True)
|
||||
def urlsplit(url, scheme='', allow_fragments=True):
|
||||
def urlsplit(url, scheme=None, allow_fragments=True, *, missing_as_none=_MISSING_AS_NONE_DEFAULT):
|
||||
"""Parse a URL into 5 components:
|
||||
<scheme>://<netloc>/<path>?<query>#<fragment>
|
||||
|
||||
@@ -490,9 +575,18 @@ def urlsplit(url, scheme='', allow_fragments=True):
|
||||
"""
|
||||
|
||||
url, scheme, _coerce_result = _coerce_args(url, scheme)
|
||||
if url is None:
|
||||
url = ''
|
||||
scheme, netloc, url, query, fragment = _urlsplit(url, scheme, allow_fragments)
|
||||
v = SplitResult(scheme or '', netloc or '', url, query or '', fragment or '')
|
||||
return _coerce_result(v)
|
||||
if not missing_as_none:
|
||||
if scheme is None: scheme = ''
|
||||
if netloc is None: netloc = ''
|
||||
if query is None: query = ''
|
||||
if fragment is None: fragment = ''
|
||||
result = SplitResult(scheme, netloc, url, query, fragment)
|
||||
result = _coerce_result(result)
|
||||
result._keep_empty = missing_as_none
|
||||
return result
|
||||
|
||||
def _urlsplit(url, scheme=None, allow_fragments=True):
|
||||
# Only lstrip url as some applications rely on preserving trailing space.
|
||||
@@ -528,38 +622,61 @@ def _urlsplit(url, scheme=None, allow_fragments=True):
|
||||
_checknetloc(netloc)
|
||||
return (scheme, netloc, url, query, fragment)
|
||||
|
||||
def urlunparse(components):
|
||||
def urlunparse(components, *, keep_empty=_UNSPECIFIED):
|
||||
"""Put a parsed URL back together again. This may result in a
|
||||
slightly different, but equivalent URL, if the URL that was parsed
|
||||
originally had redundant delimiters, e.g. a ? with an empty query
|
||||
(the draft states that these are equivalent)."""
|
||||
(the draft states that these are equivalent) and keep_empty is false
|
||||
or components is the result of the urlparse() call with
|
||||
missing_as_none=False."""
|
||||
scheme, netloc, url, params, query, fragment, _coerce_result = (
|
||||
_coerce_args(*components))
|
||||
if not netloc:
|
||||
if scheme and scheme in uses_netloc and (not url or url[:1] == '/'):
|
||||
netloc = ''
|
||||
else:
|
||||
netloc = None
|
||||
if params:
|
||||
if keep_empty is _UNSPECIFIED:
|
||||
keep_empty = getattr(components, '_keep_empty', _MISSING_AS_NONE_DEFAULT)
|
||||
elif keep_empty and not getattr(components, '_keep_empty', True):
|
||||
raise ValueError('Cannot distinguish between empty and not defined '
|
||||
'URI components in the result of parsing URL with '
|
||||
'missing_as_none=False')
|
||||
if not keep_empty:
|
||||
if not netloc:
|
||||
if scheme and scheme in uses_netloc and (not url or url[:1] == '/'):
|
||||
netloc = ''
|
||||
else:
|
||||
netloc = None
|
||||
if not scheme: scheme = None
|
||||
if not params: params = None
|
||||
if not query: query = None
|
||||
if not fragment: fragment = None
|
||||
if params is not None:
|
||||
url = "%s;%s" % (url, params)
|
||||
return _coerce_result(_urlunsplit(scheme or None, netloc, url,
|
||||
query or None, fragment or None))
|
||||
return _coerce_result(_urlunsplit(scheme, netloc, url, query, fragment))
|
||||
|
||||
def urlunsplit(components):
|
||||
def urlunsplit(components, *, keep_empty=_UNSPECIFIED):
|
||||
"""Combine the elements of a tuple as returned by urlsplit() into a
|
||||
complete URL as a string. The data argument can be any five-item iterable.
|
||||
This may result in a slightly different, but equivalent URL, if the URL that
|
||||
was parsed originally had unnecessary delimiters (for example, a ? with an
|
||||
empty query; the RFC states that these are equivalent)."""
|
||||
empty query; the RFC states that these are equivalent) and keep_empty
|
||||
is false or components is the result of the urlsplit() call with
|
||||
missing_as_none=False."""
|
||||
scheme, netloc, url, query, fragment, _coerce_result = (
|
||||
_coerce_args(*components))
|
||||
if not netloc:
|
||||
if scheme and scheme in uses_netloc and (not url or url[:1] == '/'):
|
||||
netloc = ''
|
||||
else:
|
||||
netloc = None
|
||||
return _coerce_result(_urlunsplit(scheme or None, netloc, url,
|
||||
query or None, fragment or None))
|
||||
if keep_empty is _UNSPECIFIED:
|
||||
keep_empty = getattr(components, '_keep_empty', _MISSING_AS_NONE_DEFAULT)
|
||||
elif keep_empty and not getattr(components, '_keep_empty', True):
|
||||
raise ValueError('Cannot distinguish between empty and not defined '
|
||||
'URI components in the result of parsing URL with '
|
||||
'missing_as_none=False')
|
||||
if not keep_empty:
|
||||
if not netloc:
|
||||
if scheme and scheme in uses_netloc and (not url or url[:1] == '/'):
|
||||
netloc = ''
|
||||
else:
|
||||
netloc = None
|
||||
if not scheme: scheme = None
|
||||
if not query: query = None
|
||||
if not fragment: fragment = None
|
||||
return _coerce_result(_urlunsplit(scheme, netloc, url, query, fragment))
|
||||
|
||||
def _urlunsplit(scheme, netloc, url, query, fragment):
|
||||
if netloc is not None:
|
||||
@@ -647,21 +764,24 @@ def urljoin(base, url, allow_fragments=True):
|
||||
resolved_path) or '/', query, fragment))
|
||||
|
||||
|
||||
def urldefrag(url):
|
||||
def urldefrag(url, *, missing_as_none=_MISSING_AS_NONE_DEFAULT):
|
||||
"""Removes any existing fragment from URL.
|
||||
|
||||
Returns a tuple of the defragmented URL and the fragment. If
|
||||
the URL contained no fragments, the second element is the
|
||||
empty string.
|
||||
empty string or None if missing_as_none is True.
|
||||
"""
|
||||
url, _coerce_result = _coerce_args(url)
|
||||
if '#' in url:
|
||||
s, n, p, q, frag = _urlsplit(url)
|
||||
defrag = _urlunsplit(s, n, p, q, None)
|
||||
else:
|
||||
frag = ''
|
||||
frag = None
|
||||
defrag = url
|
||||
return _coerce_result(DefragResult(defrag, frag or ''))
|
||||
if not missing_as_none and frag is None: frag = ''
|
||||
result = _coerce_result(DefragResult(defrag, frag))
|
||||
result._keep_empty = missing_as_none
|
||||
return result
|
||||
|
||||
_hexdig = '0123456789ABCDEFabcdef'
|
||||
_hextobyte = None
|
||||
|
||||
@@ -1,21 +1,21 @@
|
||||
diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
|
||||
index d6c83a75c1c..19ed4e01091 100644
|
||||
index 49a292df934..e1669a0c9b2 100644
|
||||
--- a/Lib/test/test_urlparse.py
|
||||
+++ b/Lib/test/test_urlparse.py
|
||||
@@ -237,11 +237,6 @@ def test_roundtrips(self):
|
||||
'','',''),
|
||||
@@ -282,11 +282,6 @@ def test_qs(self, orig, expect):
|
||||
None,None,None),
|
||||
('git+ssh', 'git@github.com','/user/project.git',
|
||||
'', '')),
|
||||
None, None)),
|
||||
- ('itms-services://?action=download-manifest&url=https://example.com/app',
|
||||
- ('itms-services', '', '', '',
|
||||
- 'action=download-manifest&url=https://example.com/app', ''),
|
||||
- ('itms-services', '', '', None,
|
||||
- 'action=download-manifest&url=https://example.com/app', None),
|
||||
- ('itms-services', '', '',
|
||||
- 'action=download-manifest&url=https://example.com/app', '')),
|
||||
- 'action=download-manifest&url=https://example.com/app', None)),
|
||||
('+scheme:path/to/file',
|
||||
('', '', '+scheme:path/to/file', '', '', ''),
|
||||
('', '', '+scheme:path/to/file', '', '')),
|
||||
(None, None, '+scheme:path/to/file', None, None, None),
|
||||
(None, None, '+scheme:path/to/file', None, None)),
|
||||
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
|
||||
index 8f724f907d4..148caf742c9 100644
|
||||
index e917f8b61bb..8575172573f 100644
|
||||
--- a/Lib/urllib/parse.py
|
||||
+++ b/Lib/urllib/parse.py
|
||||
@@ -59,7 +59,7 @@
|
||||
|
||||
@@ -0,0 +1,6 @@
|
||||
Add the *missing_as_none* parameter to :func:`~urllib.parse.urlparse`,
|
||||
:func:`~urllib.parse.urlsplit` and :func:`~urllib.parse.urldefrag`
|
||||
functions. Add the *keep_empty* parameter to
|
||||
:func:`~urllib.parse.urlunparse` and :func:`~urllib.parse.urlunsplit`
|
||||
functions. This allows to distinguish between empty and not defined URI
|
||||
components and preserve empty components.
|
||||
Reference in New Issue
Block a user