This repository was archived by the owner on Oct 2, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy path00320-CVE-2019-9636-and-CVE-2019-10160.patch
137 lines (124 loc) · 5.62 KB
/
00320-CVE-2019-9636-and-CVE-2019-10160.patch
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
diff --git a/Doc/library/urllib.parse.rst b/Doc/library/urllib.parse.rst
index d991254..647af61 100644
--- a/Doc/library/urllib.parse.rst
+++ b/Doc/library/urllib.parse.rst
@@ -121,6 +121,11 @@ or on combining URL components into a URL string.
Unmatched square brackets in the :attr:`netloc` attribute will raise a
:exc:`ValueError`.
+ Characters in the :attr:`netloc` attribute that decompose under NFKC
+ normalization (as used by the IDNA encoding) into any of ``/``, ``?``,
+ ``#``, ``@``, or ``:`` will raise a :exc:`ValueError`. If the URL is
+ decomposed before parsing, no error will be raised.
+
.. versionchanged:: 3.2
Added IPv6 URL parsing capabilities.
@@ -133,6 +138,10 @@ or on combining URL components into a URL string.
Out-of-range port numbers now raise :exc:`ValueError`, instead of
returning :const:`None`.
+ .. versionchanged:: 3.6.9
+ Characters that affect netloc parsing under NFKC normalization will
+ now raise :exc:`ValueError`.
+
.. function:: parse_qs(qs, keep_blank_values=False, strict_parsing=False, encoding='utf-8', errors='replace', max_num_fields=None)
@@ -256,10 +265,19 @@ or on combining URL components into a URL string.
Unmatched square brackets in the :attr:`netloc` attribute will raise a
:exc:`ValueError`.
+ Characters in the :attr:`netloc` attribute that decompose under NFKC
+ normalization (as used by the IDNA encoding) into any of ``/``, ``?``,
+ ``#``, ``@``, or ``:`` will raise a :exc:`ValueError`. If the URL is
+ decomposed before parsing, no error will be raised.
+
.. versionchanged:: 3.6
Out-of-range port numbers now raise :exc:`ValueError`, instead of
returning :const:`None`.
+ .. versionchanged:: 3.6.9
+ Characters that affect netloc parsing under NFKC normalization will
+ now raise :exc:`ValueError`.
+
.. function:: urlunsplit(parts)
diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
index be50b47..68f633c 100644
--- a/Lib/test/test_urlparse.py
+++ b/Lib/test/test_urlparse.py
@@ -1,3 +1,5 @@
+import sys
+import unicodedata
import unittest
import urllib.parse
@@ -984,6 +986,34 @@ class UrlParseTestCase(unittest.TestCase):
expected.append(name)
self.assertCountEqual(urllib.parse.__all__, expected)
+ def test_urlsplit_normalization(self):
+ # Certain characters should never occur in the netloc,
+ # including under normalization.
+ # Ensure that ALL of them are detected and cause an error
+ illegal_chars = '/:#?@'
+ hex_chars = {'{:04X}'.format(ord(c)) for c in illegal_chars}
+ denorm_chars = [
+ c for c in map(chr, range(128, sys.maxunicode))
+ if (hex_chars & set(unicodedata.decomposition(c).split()))
+ and c not in illegal_chars
+ ]
+ # Sanity check that we found at least one such character
+ self.assertIn('\u2100', denorm_chars)
+ self.assertIn('\uFF03', denorm_chars)
+
+ # bpo-36742: Verify port separators are ignored when they
+ # existed prior to decomposition
+ urllib.parse.urlsplit('http://\u30d5\u309a:80')
+ with self.assertRaises(ValueError):
+ urllib.parse.urlsplit('http://\u30d5\u309a\ufe1380')
+
+ for scheme in ["http", "https", "ftp"]:
+ for netloc in ["netloc{}false.netloc", "n{}user@netloc"]:
+ for c in denorm_chars:
+ url = "{}://{}/path".format(scheme, netloc.format(c))
+ with self.subTest(url=url, char='{:04X}'.format(ord(c))):
+ with self.assertRaises(ValueError):
+ urllib.parse.urlsplit(url)
class Utility_Tests(unittest.TestCase):
"""Testcase to test the various utility functions in the urllib."""
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
index 85e68c8..fa8827a 100644
--- a/Lib/urllib/parse.py
+++ b/Lib/urllib/parse.py
@@ -391,6 +391,24 @@ def _splitnetloc(url, start=0):
delim = min(delim, wdelim) # use earliest delim position
return url[start:delim], url[delim:] # return (domain, rest)
+def _checknetloc(netloc):
+ if not netloc or not any(ord(c) > 127 for c in netloc):
+ return
+ # looking for characters like \u2100 that expand to 'a/c'
+ # IDNA uses NFKC equivalence, so normalize for this check
+ import unicodedata
+ n = netloc.replace('@', '') # ignore characters already included
+ n = n.replace(':', '') # but not the surrounding text
+ n = n.replace('#', '')
+ n = n.replace('?', '')
+ netloc2 = unicodedata.normalize('NFKC', n)
+ if n == netloc2:
+ return
+ for c in '/?#@:':
+ if c in netloc2:
+ raise ValueError("netloc '" + netloc + "' contains invalid " +
+ "characters under NFKC normalization")
+
def urlsplit(url, scheme='', allow_fragments=True):
"""Parse a URL into 5 components:
<scheme>://<netloc>/<path>?<query>#<fragment>
@@ -420,6 +438,7 @@ def urlsplit(url, scheme='', allow_fragments=True):
url, fragment = url.split('#', 1)
if '?' in url:
url, query = url.split('?', 1)
+ _checknetloc(netloc)
v = SplitResult(scheme, netloc, url, query, fragment)
_parse_cache[key] = v
return _coerce_result(v)
@@ -443,6 +462,7 @@ def urlsplit(url, scheme='', allow_fragments=True):
url, fragment = url.split('#', 1)
if '?' in url:
url, query = url.split('?', 1)
+ _checknetloc(netloc)
v = SplitResult(scheme, netloc, url, query, fragment)
_parse_cache[key] = v
return _coerce_result(v)