From b1043b467598e486e718a4232d15ef1de11d50df Mon Sep 17 00:00:00 2001 From: Evert Lammerts Date: Wed, 10 May 2017 21:25:03 +0200 Subject: [PATCH 01/25] #1134 Support X-Forwarded-* and Forwarded implicitly --- aiohttp/hdrs.py | 3 +++ aiohttp/web_request.py | 48 +++++++++++++++++++++++++++++++++--------- 2 files changed, 41 insertions(+), 10 deletions(-) diff --git a/aiohttp/hdrs.py b/aiohttp/hdrs.py index f994319e48d..48c633903df 100644 --- a/aiohttp/hdrs.py +++ b/aiohttp/hdrs.py @@ -50,6 +50,7 @@ ETAG = istr('ETAG') EXPECT = istr('EXPECT') EXPIRES = istr('EXPIRES') +FORWARDED = istr('FORWARDED') FROM = istr('FROM') HOST = istr('HOST') IF_MATCH = istr('IF-MATCH') @@ -89,3 +90,5 @@ WANT_DIGEST = istr('WANT-DIGEST') WARNING = istr('WARNING') WWW_AUTHENTICATE = istr('WWW-AUTHENTICATE') +X_FORWARDED_HOST = istr('X-FORWARDED-HOST') +X_FORWARDED_PROTO = istr('X-FORWARDED-PROTO') diff --git a/aiohttp/web_request.py b/aiohttp/web_request.py index 204c7b5a258..0c393756446 100644 --- a/aiohttp/web_request.py +++ b/aiohttp/web_request.py @@ -150,16 +150,31 @@ def secure(self): """ return self.url.scheme == 'https' + @reify + def _forwarded(self): + forwarded = self._message.headers.get(hdrs.FORWARDED) + if forwarded is not None: + parsed = re.findall( + r'^by=([^;]*); +for=([^;]*); +host=([^;]*); +proto=(https?)$', + forwarded) + if parsed: + return parsed[0] + return None + @reify def _scheme(self): + proto = 'http' if self._transport.get_extra_info('sslcontext'): - return 'https' - secure_proxy_ssl_header = self._secure_proxy_ssl_header - if secure_proxy_ssl_header is not None: - header, value = secure_proxy_ssl_header + proto = 'https' + elif self._secure_proxy_ssl_header is not None: + header, value = self._secure_proxy_ssl_header if self.headers.get(header) == value: - return 'https' - return 'http' + proto = 'https' + elif self._forwarded: + _, _, _, proto = self._forwarded + elif hdrs.X_FORWARDED_PROTO in self._message.headers: + proto = self._message.headers[hdrs.X_FORWARDED_PROTO] + return proto @property def method(self): @@ -179,16 +194,29 @@ def version(self): @reify def host(self): - """Read only property for getting *HOST* header of request. + """ Hostname of the request. + + Hostname is resolved through the following headers, in this order: + + - Forwarded + - X-Forwarded-Host + - Host - Returns str or None if HTTP request has no HOST header. + Returns str, or None if no hostname is found in the headers. """ - return self._message.headers.get(hdrs.HOST) + host = None + if self._forwarded: + _, _, host, _ = self._forwarded + elif hdrs.X_FORWARDED_HOST in self._message.headers: + host = self._message.headers[hdrs.X_FORWARDED_HOST] + elif hdrs.HOST in self._message.headers: + host = self._message.headers[hdrs.HOST] + return host @reify def url(self): return URL('{}://{}{}'.format(self._scheme, - self._message.headers.get(hdrs.HOST), + self.host, str(self._rel_url))) @property From 85fcdfc4978ec690a0bd3b0b5fbcd6d3bbf365fe Mon Sep 17 00:00:00 2001 From: Evert Lammerts Date: Thu, 11 May 2017 11:38:26 +0200 Subject: [PATCH 02/25] #1134 tests --- tests/test_web_request.py | 56 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/tests/test_web_request.py b/tests/test_web_request.py index 2f3fbe4462f..0302dfa3e9b 100644 --- a/tests/test_web_request.py +++ b/tests/test_web_request.py @@ -248,6 +248,62 @@ def test_https_scheme_by_secure_proxy_ssl_header_false_test(make_request): assert req.secure is False +def test_https_scheme_by_forwarded_header(make_request): + req = make_request('GET', '/', + headers=CIMultiDict( + {'Forwarded': 'by=; for=; host=; proto=https'})) + assert "https" == req.scheme + assert req.secure is True + + +def test_https_scheme_by_malformed_forwarded_header(make_request): + req = make_request('GET', '/', + headers=CIMultiDict({'Forwarded': 'malformed value'})) + assert "http" == req.scheme + assert req.secure is False + + +def test_https_scheme_by_x_forwarded_proto_header(make_request): + req = make_request('GET', '/', + headers=CIMultiDict({'X-Forwarded-Proto': 'https'})) + assert "https" == req.scheme + assert req.secure is True + + +def test_https_scheme_by_x_forwarded_proto_header_no_tls(make_request): + req = make_request('GET', '/', + headers=CIMultiDict({'X-Forwarded-Proto': 'http'})) + assert "http" == req.scheme + assert req.secure is False + + +def test_host_by_forwarded_header(make_request): + req = make_request('GET', '/', + headers=CIMultiDict( + {'Forwarded': 'by=; for=; host' + '=example.com; proto=https'})) + assert req.host == 'example.com' + + +def test_host_by_forwarded_header_malformed(make_request): + req = make_request('GET', '/', + headers=CIMultiDict({'Forwarded': 'malformed value'})) + assert req.host is None + + +def test_host_by_x_forwarded_host_header(make_request): + req = make_request('GET', '/', + headers=CIMultiDict( + {'X-Forwarded-Host': 'example.com'})) + assert req.host == 'example.com' + + +def test_host_by_host_header(make_request): + req = make_request('GET', '/', + headers=CIMultiDict({'Host': 'example.com'})) + assert req.host == 'example.com' + + def test_raw_headers(make_request): req = make_request('GET', '/', headers=CIMultiDict({'X-HEADER': 'aaa'})) From be95cb4f4cb883eaf7732b4f6465379ae89d5c80 Mon Sep 17 00:00:00 2001 From: Evert Lammerts Date: Thu, 11 May 2017 22:13:13 +0200 Subject: [PATCH 03/25] added myself to contributors --- CONTRIBUTORS.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index c6bfe84e2d1..3b5a2d3ec9a 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -61,6 +61,7 @@ Enrique Saez Erich Healy Eugene Chernyshov Eugene Naydenov +Evert Lammerts Frederik Gladhorn Frederik Peter Aalund Gabriel Tremblay From 9554ce098ff61ae5961cbbeeb536b166ce01f879 Mon Sep 17 00:00:00 2001 From: Evert Lammerts Date: Thu, 11 May 2017 22:17:13 +0200 Subject: [PATCH 04/25] added line to changes.rst --- CHANGES.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index 7283d2b4cd8..ca41036e332 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -31,6 +31,8 @@ Changes - Do not unquote `+` in match_info values #1816 +- Use Forwarded, X-Forwarded-Scheme and X-Forwarded-Host for better scheme and host resolution. #1134 + 2.0.7 (2017-04-12) ------------------ From 76b16247d20f8e2a2a22a5ef98806e9d45609828 Mon Sep 17 00:00:00 2001 From: Evert Lammerts Date: Sun, 14 May 2017 08:05:57 +0200 Subject: [PATCH 05/25] Update CHANGES.rst --- CHANGES.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 01cb586d32d..0f0f0c8eb9e 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -41,7 +41,8 @@ Changes - Do not unquote `+` in match_info values #1816 -- Use Forwarded, X-Forwarded-Scheme and X-Forwarded-Host for better scheme and host resolution. #1134 +- Use Forwarded, X-Forwarded-Scheme and X-Forwarded-Host for better scheme and +host resolution. #1134 - Fix sub-application middlewares resolution order #1853 From 1f79171fd922dc0a1b7c3c2bc4df240c420bb205 Mon Sep 17 00:00:00 2001 From: Evert Lammerts Date: Sun, 14 May 2017 08:16:57 +0200 Subject: [PATCH 06/25] Update CHANGES.rst --- CHANGES.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 0f0f0c8eb9e..89ba74e2a00 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -42,7 +42,7 @@ Changes - Do not unquote `+` in match_info values #1816 - Use Forwarded, X-Forwarded-Scheme and X-Forwarded-Host for better scheme and -host resolution. #1134 + host resolution. #1134 - Fix sub-application middlewares resolution order #1853 From c2848eda1f24a24ed78a10703198f9a8ffb88703 Mon Sep 17 00:00:00 2001 From: Evert Lammerts Date: Sun, 14 May 2017 20:53:14 +0200 Subject: [PATCH 07/25] deprecating secure_proxy_ssl_header --- aiohttp/web.py | 4 ++++ docs/web_reference.rst | 22 ++++++++++++---------- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/aiohttp/web.py b/aiohttp/web.py index f117fbc4cea..6f3d3dde1ca 100644 --- a/aiohttp/web.py +++ b/aiohttp/web.py @@ -58,6 +58,10 @@ def __init__(self, *, if loop is not None: warnings.warn("loop argument is deprecated", ResourceWarning) + if secure_proxy_ssl_header is not None: + warnings.warn( + "secure_proxy_ssl_header is deprecated", ResourceWarning) + self._debug = debug self._router = router self._secure_proxy_ssl_header = secure_proxy_ssl_header diff --git a/docs/web_reference.rst b/docs/web_reference.rst index a2b33453a18..bd9b5e9d87d 100644 --- a/docs/web_reference.rst +++ b/docs/web_reference.rst @@ -66,8 +66,10 @@ and :ref:`aiohttp-web-signals` handlers. A string representing the scheme of the request. - The scheme is ``'https'`` if transport for request handling is - *SSL* or ``secure_proxy_ssl_header`` is matching. + The scheme is ``'https'`` if transport for request handling is *SSL*, if + ``secure_proxy_ssl_header`` is matching (deprecated), if the ``proto`` + portion of a ``Forward`` header is present and contains ``https``, or if + an ``X-Forwarded-Proto`` header is present and contains ``https``. ``'http'`` otherwise. @@ -81,9 +83,7 @@ and :ref:`aiohttp-web-signals` handlers. .. attribute:: secure - A boolean indicating if transport for request handling is - *SSL* or ``secure_proxy_ssl_header`` is matching, - e.g. if ``request.url.scheme == 'https'`` + Shorthand for ``request.url.scheme == 'https'`` Read-only :class:`bool` property. @@ -1049,7 +1049,7 @@ WebSocketResponse .. seealso:: :ref:`WebSockets handling` - + WebSocketReady ^^^^^^^^^^^^^^ @@ -1233,11 +1233,13 @@ duplicated like one using :meth:`Application.copy`. If param is ``None`` :func:`asyncio.get_event_loop` used for getting default event loop. - :param tuple secure_proxy_ssl_header: Secure proxy SSL header. Can - be used to detect request scheme, - e.g. ``secure_proxy_ssl_header=('X-Forwarded-Proto', 'https')``. + :param tuple secure_proxy_ssl_header: Default: ``None``. + + .. deprecated:: 2.1 + + See ``request.url.scheme`` for built-in resolution of the current + scheme using the standard and de-facto standard headers. - Default: ``None``. :param bool tcp_keepalive: Enable TCP Keep-Alive. Default: ``True``. :param int keepalive_timeout: Number of seconds before closing Keep-Alive connection. Default: ``75`` seconds (NGINX's default value). From 73d4121f4a93cdd1c6d5d13a7c7978dde9e99dd0 Mon Sep 17 00:00:00 2001 From: Evert Lammerts Date: Sat, 20 May 2017 23:49:13 +0200 Subject: [PATCH 08/25] more extensive Forwarded parsing --- aiohttp/web_request.py | 86 +++++++++++++++++++++++++++++++++------ tests/test_web_request.py | 41 +++++++++++++++++++ 2 files changed, 114 insertions(+), 13 deletions(-) diff --git a/aiohttp/web_request.py b/aiohttp/web_request.py index 0c393756446..04dc61ba1cc 100644 --- a/aiohttp/web_request.py +++ b/aiohttp/web_request.py @@ -3,6 +3,7 @@ import datetime import json import re +import string import tempfile import warnings from email.utils import parsedate @@ -21,6 +22,35 @@ FileField = collections.namedtuple( 'Field', 'name filename file content_type headers') +_TCHAR = string.digits + string.ascii_letters + r"!#$%&'*+\-.^_`|~" +# notice the escape of '-' to prevent interpretation as range + +_TOKEN = r'[{tchar}]*'.format(tchar=_TCHAR) + +_QDTEXT = r'[{}]'.format( + r''.join(chr(c) for c in (0x09, 0x20, 0x21, *range(0x23, 0x7F)))) +# qdtext includes 0x5C to escape 0x5D ('\]') +# qdtext excludes obs-text (because obsoleted, and encoding not specified) + +_QUOTED_PAIR = r'\\[\t {tchar}]'.format(tchar=_TCHAR) + +_QUOTED_STRING = r'"(?:{quoted_pair}|{qdtext})*"'.format( + qdtext=_QDTEXT, quoted_pair=_QUOTED_PAIR) + +_FORWARDED_PARAMS = ( + r'[bB][yY]|[fF][oO][rR]|[hH][oO][sS][tT]|[pP][rR][oO][tT][oO]') + +_FORWARDED_PAIR = ( + r'^ *({forwarded_params})=({token}|{quoted_string}) *$'.format( + forwarded_params=_FORWARDED_PARAMS, + token=_TOKEN, + quoted_string=_QUOTED_STRING)) +# allow whitespace as specified in RFC 7239 section 7.1 + +_QUOTED_PAIR_REPLACE_RE = re.compile(r'\\([\t {tchar}])'.format(tchar=_TCHAR)) +# same pattern as _QUOTED_PAIR but contains a capture group + +_FORWARDED_PAIR_RE = re.compile(_FORWARDED_PAIR) ############################################################ # HTTP Request @@ -151,15 +181,45 @@ def secure(self): return self.url.scheme == 'https' @reify - def _forwarded(self): - forwarded = self._message.headers.get(hdrs.FORWARDED) - if forwarded is not None: - parsed = re.findall( - r'^by=([^;]*); +for=([^;]*); +host=([^;]*); +proto=(https?)$', - forwarded) - if parsed: - return parsed[0] - return None + def forwarded(self): + """ A frozendict containing parsed Forwarded header(s). + + Makes an effort to parse Forwarded headers as specified by RFC 7239: + + - It adds all parameters (by, for, host, proto) in the order it finds + them; starting at the topmost / first added 'Forwarded' header, at + the leftmost / first-added parwameter. + - It checks that the value has valid syntax in general as specified in + section 4: either a 'token' or a 'quoted-string'. + - It un-escapes found escape sequences. + - It does NOT validate 'by' and 'for' contents as specified in section + 6. + - It does NOT validate 'host' contents (Host ABNF). + - It does NOT validate 'proto' contents for valid URI scheme names. + + Returns a dict(by=tuple(...), for=tuple(...), host=tuple(...), + proto=tuple(...), ) + """ + params = {'by': [], 'for': [], 'host': [], 'proto': []} + if hdrs.FORWARDED in self._message.headers: + for forwarded_elm in self._message.headers.getall(hdrs.FORWARDED): + if len(forwarded_elm): + forwarded_pairs = tuple( + _FORWARDED_PAIR_RE.findall(pair) + for pair in forwarded_elm.split(';')) + for forwarded_pair in forwarded_pairs: + if len(forwarded_pair) != 1: + # non-compliant syntax, ignore + continue + param = forwarded_pair[0][0].lower() + value = forwarded_pair[0][1] + if len(value) and value[0] == '"': + # quoted string: replace quotes and escape + # sequences + value = _QUOTED_PAIR_REPLACE_RE.sub( + r'\1', value[1:-1]) + params[param].append(value) + return params @reify def _scheme(self): @@ -170,8 +230,8 @@ def _scheme(self): header, value = self._secure_proxy_ssl_header if self.headers.get(header) == value: proto = 'https' - elif self._forwarded: - _, _, _, proto = self._forwarded + elif self.forwarded['proto']: + proto = self.forwarded['proto'][0] elif hdrs.X_FORWARDED_PROTO in self._message.headers: proto = self._message.headers[hdrs.X_FORWARDED_PROTO] return proto @@ -205,8 +265,8 @@ def host(self): Returns str, or None if no hostname is found in the headers. """ host = None - if self._forwarded: - _, _, host, _ = self._forwarded + if self.forwarded['host']: + host = self.forwarded['host'][0] elif hdrs.X_FORWARDED_HOST in self._message.headers: host = self._message.headers[hdrs.X_FORWARDED_HOST] elif hdrs.HOST in self._message.headers: diff --git a/tests/test_web_request.py b/tests/test_web_request.py index 0302dfa3e9b..b51f23d94eb 100644 --- a/tests/test_web_request.py +++ b/tests/test_web_request.py @@ -248,6 +248,47 @@ def test_https_scheme_by_secure_proxy_ssl_header_false_test(make_request): assert req.secure is False +def test_single_forwarded_header(make_request): + header = 'by=identifier; for=identifier; host=identifier; proto=identifier' + req = make_request('GET', '/', headers=CIMultiDict({'Forwarded': header})) + assert req.forwarded['by'] == ['identifier',] + assert req.forwarded['for'] == ['identifier',] + assert req.forwarded['host'] == ['identifier',] + assert req.forwarded['proto'] == ['identifier',] + +def test_single_forwarded_header_camelcase(make_request): + header = 'bY=identifier; fOr=identifier; HOst=identifier; pRoTO=identifier' + req = make_request('GET', '/', headers=CIMultiDict({'Forwarded': header})) + assert req.forwarded['by'] == ['identifier',] + assert req.forwarded['for'] == ['identifier',] + assert req.forwarded['host'] == ['identifier',] + assert req.forwarded['proto'] == ['identifier',] + +def test_single_forwarded_header_single_param(make_request): + header = 'BY=identifier' + req = make_request('GET', '/', headers=CIMultiDict({'Forwarded': header})) + assert req.forwarded['by'] == ['identifier',] + +def test_single_forwarded_header_multiple_param(make_request): + header = 'By=identifier1;BY=identifier2; By=identifier3; BY=identifier4' + req = make_request('GET', '/', headers=CIMultiDict({'Forwarded': header})) + assert req.forwarded['by'] == ['identifier1', 'identifier2', 'identifier3', + 'identifier4'] + +def test_single_forwarded_header_quoted_escaped(make_request): + header = 'Proto=identifier; pROTO="\lala lan\d\~ 123\!&"' + req = make_request('GET', '/', headers=CIMultiDict({'Forwarded': header})) + assert req.forwarded['proto'] == ['identifier', 'lala land~ 123!&'] + +def test_multiple_forwarded_headers(make_request): + headers = CIMultiDict() + headers.add('Forwarded', 'By=identifier1;BY=identifier2') + headers.add('Forwarded', 'By=identifier3; BY=identifier4') + req = make_request('GET', '/', headers=headers) + assert req.forwarded['by'] == ['identifier1', 'identifier2', 'identifier3', + 'identifier4'] + + def test_https_scheme_by_forwarded_header(make_request): req = make_request('GET', '/', headers=CIMultiDict( From 0cb4b22035a14d37893bcdd2ddb688ab37f772f6 Mon Sep 17 00:00:00 2001 From: Evert Lammerts Date: Sun, 21 May 2017 08:46:22 +0200 Subject: [PATCH 09/25] flake --- aiohttp/web_request.py | 2 +- tests/test_web_request.py | 23 ++++++++++++++--------- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/aiohttp/web_request.py b/aiohttp/web_request.py index 04dc61ba1cc..fa4fadab71c 100644 --- a/aiohttp/web_request.py +++ b/aiohttp/web_request.py @@ -198,7 +198,7 @@ def forwarded(self): - It does NOT validate 'proto' contents for valid URI scheme names. Returns a dict(by=tuple(...), for=tuple(...), host=tuple(...), - proto=tuple(...), ) + proto=tuple(...), ) """ params = {'by': [], 'for': [], 'host': [], 'proto': []} if hdrs.FORWARDED in self._message.headers: diff --git a/tests/test_web_request.py b/tests/test_web_request.py index b51f23d94eb..57a846a65aa 100644 --- a/tests/test_web_request.py +++ b/tests/test_web_request.py @@ -251,23 +251,26 @@ def test_https_scheme_by_secure_proxy_ssl_header_false_test(make_request): def test_single_forwarded_header(make_request): header = 'by=identifier; for=identifier; host=identifier; proto=identifier' req = make_request('GET', '/', headers=CIMultiDict({'Forwarded': header})) - assert req.forwarded['by'] == ['identifier',] - assert req.forwarded['for'] == ['identifier',] - assert req.forwarded['host'] == ['identifier',] - assert req.forwarded['proto'] == ['identifier',] + assert req.forwarded['by'] == ['identifier'] + assert req.forwarded['for'] == ['identifier'] + assert req.forwarded['host'] == ['identifier'] + assert req.forwarded['proto'] == ['identifier'] + def test_single_forwarded_header_camelcase(make_request): header = 'bY=identifier; fOr=identifier; HOst=identifier; pRoTO=identifier' req = make_request('GET', '/', headers=CIMultiDict({'Forwarded': header})) - assert req.forwarded['by'] == ['identifier',] - assert req.forwarded['for'] == ['identifier',] - assert req.forwarded['host'] == ['identifier',] - assert req.forwarded['proto'] == ['identifier',] + assert req.forwarded['by'] == ['identifier'] + assert req.forwarded['for'] == ['identifier'] + assert req.forwarded['host'] == ['identifier'] + assert req.forwarded['proto'] == ['identifier'] + def test_single_forwarded_header_single_param(make_request): header = 'BY=identifier' req = make_request('GET', '/', headers=CIMultiDict({'Forwarded': header})) - assert req.forwarded['by'] == ['identifier',] + assert req.forwarded['by'] == ['identifier'] + def test_single_forwarded_header_multiple_param(make_request): header = 'By=identifier1;BY=identifier2; By=identifier3; BY=identifier4' @@ -275,11 +278,13 @@ def test_single_forwarded_header_multiple_param(make_request): assert req.forwarded['by'] == ['identifier1', 'identifier2', 'identifier3', 'identifier4'] + def test_single_forwarded_header_quoted_escaped(make_request): header = 'Proto=identifier; pROTO="\lala lan\d\~ 123\!&"' req = make_request('GET', '/', headers=CIMultiDict({'Forwarded': header})) assert req.forwarded['proto'] == ['identifier', 'lala land~ 123!&'] + def test_multiple_forwarded_headers(make_request): headers = CIMultiDict() headers.add('Forwarded', 'By=identifier1;BY=identifier2') From 2b33c7d47b25c9ec41f068ce2e3be23a510ea0e8 Mon Sep 17 00:00:00 2001 From: Evert Lammerts Date: Sun, 21 May 2017 13:10:33 +0200 Subject: [PATCH 10/25] small changes --- aiohttp/web_request.py | 37 +++++++++++++++++-------------------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/aiohttp/web_request.py b/aiohttp/web_request.py index fa4fadab71c..a3b64a81170 100644 --- a/aiohttp/web_request.py +++ b/aiohttp/web_request.py @@ -200,26 +200,23 @@ def forwarded(self): Returns a dict(by=tuple(...), for=tuple(...), host=tuple(...), proto=tuple(...), ) """ - params = {'by': [], 'for': [], 'host': [], 'proto': []} - if hdrs.FORWARDED in self._message.headers: - for forwarded_elm in self._message.headers.getall(hdrs.FORWARDED): - if len(forwarded_elm): - forwarded_pairs = tuple( - _FORWARDED_PAIR_RE.findall(pair) - for pair in forwarded_elm.split(';')) - for forwarded_pair in forwarded_pairs: - if len(forwarded_pair) != 1: - # non-compliant syntax, ignore - continue - param = forwarded_pair[0][0].lower() - value = forwarded_pair[0][1] - if len(value) and value[0] == '"': - # quoted string: replace quotes and escape - # sequences - value = _QUOTED_PAIR_REPLACE_RE.sub( - r'\1', value[1:-1]) - params[param].append(value) - return params + params = MultiDict({'by': [], 'for': [], 'host': [], 'proto': []}) + for forwarded_elm in self._message.headers.getall(hdrs.FORWARDED, ()): + forwarded_pairs = (_FORWARDED_PAIR_RE.findall(pair) + for pair in forwarded_elm.split(';')) + for forwarded_pair in forwarded_pairs: + if len(forwarded_pair) != 1: + # non-compliant syntax, ignore + continue + param = forwarded_pair[0][0].lower() + value = forwarded_pair[0][1] + if value and value[0] == '"': + # quoted string: replace quotes and escape + # sequences + value = _QUOTED_PAIR_REPLACE_RE.sub( + r'\1', value[1:-1]) + params[param].append(value) + return MultiDictProxy(params) @reify def _scheme(self): From 15462c27b7b470f3a4b1ff8225f5647f54afd9ad Mon Sep 17 00:00:00 2001 From: Evert Lammerts Date: Sun, 21 May 2017 13:17:44 +0200 Subject: [PATCH 11/25] unpacking tuple implicit --- aiohttp/web_request.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/aiohttp/web_request.py b/aiohttp/web_request.py index a3b64a81170..88d2cfac187 100644 --- a/aiohttp/web_request.py +++ b/aiohttp/web_request.py @@ -208,14 +208,13 @@ def forwarded(self): if len(forwarded_pair) != 1: # non-compliant syntax, ignore continue - param = forwarded_pair[0][0].lower() - value = forwarded_pair[0][1] + param, value = forwarded_pair[0] if value and value[0] == '"': # quoted string: replace quotes and escape # sequences value = _QUOTED_PAIR_REPLACE_RE.sub( r'\1', value[1:-1]) - params[param].append(value) + params[param.lower()].append(value) return MultiDictProxy(params) @reify From b672b1646302ebffefeb975a53a11ed17ca954cd Mon Sep 17 00:00:00 2001 From: Evert Lammerts Date: Wed, 10 May 2017 21:25:03 +0200 Subject: [PATCH 12/25] #1134 Support X-Forwarded-* and Forwarded implicitly --- aiohttp/hdrs.py | 3 +++ aiohttp/web_request.py | 48 +++++++++++++++++++++++++++++++++--------- 2 files changed, 41 insertions(+), 10 deletions(-) diff --git a/aiohttp/hdrs.py b/aiohttp/hdrs.py index cbbc16a345f..0cb964136e5 100644 --- a/aiohttp/hdrs.py +++ b/aiohttp/hdrs.py @@ -51,6 +51,7 @@ ETAG = istr('ETAG') EXPECT = istr('EXPECT') EXPIRES = istr('EXPIRES') +FORWARDED = istr('FORWARDED') FROM = istr('FROM') HOST = istr('HOST') IF_MATCH = istr('IF-MATCH') @@ -90,3 +91,5 @@ WANT_DIGEST = istr('WANT-DIGEST') WARNING = istr('WARNING') WWW_AUTHENTICATE = istr('WWW-AUTHENTICATE') +X_FORWARDED_HOST = istr('X-FORWARDED-HOST') +X_FORWARDED_PROTO = istr('X-FORWARDED-PROTO') diff --git a/aiohttp/web_request.py b/aiohttp/web_request.py index eae340de1f6..955c762b330 100644 --- a/aiohttp/web_request.py +++ b/aiohttp/web_request.py @@ -151,16 +151,31 @@ def secure(self): """ return self.url.scheme == 'https' + @reify + def _forwarded(self): + forwarded = self._message.headers.get(hdrs.FORWARDED) + if forwarded is not None: + parsed = re.findall( + r'^by=([^;]*); +for=([^;]*); +host=([^;]*); +proto=(https?)$', + forwarded) + if parsed: + return parsed[0] + return None + @reify def _scheme(self): + proto = 'http' if self._transport.get_extra_info('sslcontext'): - return 'https' - secure_proxy_ssl_header = self._secure_proxy_ssl_header - if secure_proxy_ssl_header is not None: - header, value = secure_proxy_ssl_header + proto = 'https' + elif self._secure_proxy_ssl_header is not None: + header, value = self._secure_proxy_ssl_header if self.headers.get(header) == value: - return 'https' - return 'http' + proto = 'https' + elif self._forwarded: + _, _, _, proto = self._forwarded + elif hdrs.X_FORWARDED_PROTO in self._message.headers: + proto = self._message.headers[hdrs.X_FORWARDED_PROTO] + return proto @property def method(self): @@ -180,16 +195,29 @@ def version(self): @reify def host(self): - """Read only property for getting *HOST* header of request. + """ Hostname of the request. + + Hostname is resolved through the following headers, in this order: + + - Forwarded + - X-Forwarded-Host + - Host - Returns str or None if HTTP request has no HOST header. + Returns str, or None if no hostname is found in the headers. """ - return self._message.headers.get(hdrs.HOST) + host = None + if self._forwarded: + _, _, host, _ = self._forwarded + elif hdrs.X_FORWARDED_HOST in self._message.headers: + host = self._message.headers[hdrs.X_FORWARDED_HOST] + elif hdrs.HOST in self._message.headers: + host = self._message.headers[hdrs.HOST] + return host @reify def url(self): return URL('{}://{}{}'.format(self._scheme, - self._message.headers.get(hdrs.HOST), + self.host, str(self._rel_url))) @property From 105c9ed2b154164d7d770dcf68f66b6d73480c56 Mon Sep 17 00:00:00 2001 From: Evert Lammerts Date: Thu, 11 May 2017 11:38:26 +0200 Subject: [PATCH 13/25] #1134 tests --- tests/test_web_request.py | 56 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/tests/test_web_request.py b/tests/test_web_request.py index 2f3fbe4462f..0302dfa3e9b 100644 --- a/tests/test_web_request.py +++ b/tests/test_web_request.py @@ -248,6 +248,62 @@ def test_https_scheme_by_secure_proxy_ssl_header_false_test(make_request): assert req.secure is False +def test_https_scheme_by_forwarded_header(make_request): + req = make_request('GET', '/', + headers=CIMultiDict( + {'Forwarded': 'by=; for=; host=; proto=https'})) + assert "https" == req.scheme + assert req.secure is True + + +def test_https_scheme_by_malformed_forwarded_header(make_request): + req = make_request('GET', '/', + headers=CIMultiDict({'Forwarded': 'malformed value'})) + assert "http" == req.scheme + assert req.secure is False + + +def test_https_scheme_by_x_forwarded_proto_header(make_request): + req = make_request('GET', '/', + headers=CIMultiDict({'X-Forwarded-Proto': 'https'})) + assert "https" == req.scheme + assert req.secure is True + + +def test_https_scheme_by_x_forwarded_proto_header_no_tls(make_request): + req = make_request('GET', '/', + headers=CIMultiDict({'X-Forwarded-Proto': 'http'})) + assert "http" == req.scheme + assert req.secure is False + + +def test_host_by_forwarded_header(make_request): + req = make_request('GET', '/', + headers=CIMultiDict( + {'Forwarded': 'by=; for=; host' + '=example.com; proto=https'})) + assert req.host == 'example.com' + + +def test_host_by_forwarded_header_malformed(make_request): + req = make_request('GET', '/', + headers=CIMultiDict({'Forwarded': 'malformed value'})) + assert req.host is None + + +def test_host_by_x_forwarded_host_header(make_request): + req = make_request('GET', '/', + headers=CIMultiDict( + {'X-Forwarded-Host': 'example.com'})) + assert req.host == 'example.com' + + +def test_host_by_host_header(make_request): + req = make_request('GET', '/', + headers=CIMultiDict({'Host': 'example.com'})) + assert req.host == 'example.com' + + def test_raw_headers(make_request): req = make_request('GET', '/', headers=CIMultiDict({'X-HEADER': 'aaa'})) From 145f31f5da7d23a8d27058d8506a4aaabaa9f362 Mon Sep 17 00:00:00 2001 From: Evert Lammerts Date: Thu, 11 May 2017 22:13:13 +0200 Subject: [PATCH 14/25] added myself to contributors --- CONTRIBUTORS.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index bdd4977af01..e045278f88a 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -62,6 +62,7 @@ Enrique Saez Erich Healy Eugene Chernyshov Eugene Naydenov +Evert Lammerts Frederik Gladhorn Frederik Peter Aalund Gabriel Tremblay From b2a0cc5f6e37bc3618b0b0db79211e299ce8c3c2 Mon Sep 17 00:00:00 2001 From: Evert Lammerts Date: Thu, 11 May 2017 22:17:13 +0200 Subject: [PATCH 15/25] added line to changes.rst --- CHANGES.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index 96f74b3767a..086b6546e52 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -52,6 +52,10 @@ Changes - Make test server more reliable #1896 +- Use Forwarded, X-Forwarded-Scheme and X-Forwarded-Host for better scheme and + host resolution. #1134 + + 2.0.7 (2017-04-12) ------------------ From 09cd9144f43c1ce9217b0b4985d21fb7c27c4814 Mon Sep 17 00:00:00 2001 From: Evert Lammerts Date: Sun, 14 May 2017 20:53:14 +0200 Subject: [PATCH 16/25] deprecating secure_proxy_ssl_header --- aiohttp/web.py | 4 ++++ docs/web_reference.rst | 22 ++++++++++++---------- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/aiohttp/web.py b/aiohttp/web.py index 89130761a6f..e23bd2d5bd3 100644 --- a/aiohttp/web.py +++ b/aiohttp/web.py @@ -59,6 +59,10 @@ def __init__(self, *, if loop is not None: warnings.warn("loop argument is deprecated", ResourceWarning) + if secure_proxy_ssl_header is not None: + warnings.warn( + "secure_proxy_ssl_header is deprecated", ResourceWarning) + self._debug = debug self._router = router self._secure_proxy_ssl_header = secure_proxy_ssl_header diff --git a/docs/web_reference.rst b/docs/web_reference.rst index 72f355e8b60..21988c6adb4 100644 --- a/docs/web_reference.rst +++ b/docs/web_reference.rst @@ -66,8 +66,10 @@ and :ref:`aiohttp-web-signals` handlers. A string representing the scheme of the request. - The scheme is ``'https'`` if transport for request handling is - *SSL* or ``secure_proxy_ssl_header`` is matching. + The scheme is ``'https'`` if transport for request handling is *SSL*, if + ``secure_proxy_ssl_header`` is matching (deprecated), if the ``proto`` + portion of a ``Forward`` header is present and contains ``https``, or if + an ``X-Forwarded-Proto`` header is present and contains ``https``. ``'http'`` otherwise. @@ -81,9 +83,7 @@ and :ref:`aiohttp-web-signals` handlers. .. attribute:: secure - A boolean indicating if transport for request handling is - *SSL* or ``secure_proxy_ssl_header`` is matching, - e.g. if ``request.url.scheme == 'https'`` + Shorthand for ``request.url.scheme == 'https'`` Read-only :class:`bool` property. @@ -1049,7 +1049,7 @@ WebSocketResponse .. seealso:: :ref:`WebSockets handling` - + WebSocketReady ^^^^^^^^^^^^^^ @@ -1233,11 +1233,13 @@ duplicated like one using :meth:`Application.copy`. If param is ``None`` :func:`asyncio.get_event_loop` used for getting default event loop. - :param tuple secure_proxy_ssl_header: Secure proxy SSL header. Can - be used to detect request scheme, - e.g. ``secure_proxy_ssl_header=('X-Forwarded-Proto', 'https')``. + :param tuple secure_proxy_ssl_header: Default: ``None``. + + .. deprecated:: 2.1 + + See ``request.url.scheme`` for built-in resolution of the current + scheme using the standard and de-facto standard headers. - Default: ``None``. :param bool tcp_keepalive: Enable TCP Keep-Alive. Default: ``True``. :param int keepalive_timeout: Number of seconds before closing Keep-Alive connection. Default: ``75`` seconds (NGINX's default value). From df470cd135422aa7d670e28a073a312712c1354b Mon Sep 17 00:00:00 2001 From: Evert Lammerts Date: Sat, 20 May 2017 23:49:13 +0200 Subject: [PATCH 17/25] more extensive Forwarded parsing --- aiohttp/web_request.py | 86 +++++++++++++++++++++++++++++++++------ tests/test_web_request.py | 41 +++++++++++++++++++ 2 files changed, 114 insertions(+), 13 deletions(-) diff --git a/aiohttp/web_request.py b/aiohttp/web_request.py index 955c762b330..5d7a08e5a49 100644 --- a/aiohttp/web_request.py +++ b/aiohttp/web_request.py @@ -3,6 +3,7 @@ import datetime import json import re +import string import tempfile import warnings from email.utils import parsedate @@ -22,6 +23,35 @@ FileField = collections.namedtuple( 'Field', 'name filename file content_type headers') +_TCHAR = string.digits + string.ascii_letters + r"!#$%&'*+\-.^_`|~" +# notice the escape of '-' to prevent interpretation as range + +_TOKEN = r'[{tchar}]*'.format(tchar=_TCHAR) + +_QDTEXT = r'[{}]'.format( + r''.join(chr(c) for c in (0x09, 0x20, 0x21, *range(0x23, 0x7F)))) +# qdtext includes 0x5C to escape 0x5D ('\]') +# qdtext excludes obs-text (because obsoleted, and encoding not specified) + +_QUOTED_PAIR = r'\\[\t {tchar}]'.format(tchar=_TCHAR) + +_QUOTED_STRING = r'"(?:{quoted_pair}|{qdtext})*"'.format( + qdtext=_QDTEXT, quoted_pair=_QUOTED_PAIR) + +_FORWARDED_PARAMS = ( + r'[bB][yY]|[fF][oO][rR]|[hH][oO][sS][tT]|[pP][rR][oO][tT][oO]') + +_FORWARDED_PAIR = ( + r'^ *({forwarded_params})=({token}|{quoted_string}) *$'.format( + forwarded_params=_FORWARDED_PARAMS, + token=_TOKEN, + quoted_string=_QUOTED_STRING)) +# allow whitespace as specified in RFC 7239 section 7.1 + +_QUOTED_PAIR_REPLACE_RE = re.compile(r'\\([\t {tchar}])'.format(tchar=_TCHAR)) +# same pattern as _QUOTED_PAIR but contains a capture group + +_FORWARDED_PAIR_RE = re.compile(_FORWARDED_PAIR) ############################################################ # HTTP Request @@ -152,15 +182,45 @@ def secure(self): return self.url.scheme == 'https' @reify - def _forwarded(self): - forwarded = self._message.headers.get(hdrs.FORWARDED) - if forwarded is not None: - parsed = re.findall( - r'^by=([^;]*); +for=([^;]*); +host=([^;]*); +proto=(https?)$', - forwarded) - if parsed: - return parsed[0] - return None + def forwarded(self): + """ A frozendict containing parsed Forwarded header(s). + + Makes an effort to parse Forwarded headers as specified by RFC 7239: + + - It adds all parameters (by, for, host, proto) in the order it finds + them; starting at the topmost / first added 'Forwarded' header, at + the leftmost / first-added parwameter. + - It checks that the value has valid syntax in general as specified in + section 4: either a 'token' or a 'quoted-string'. + - It un-escapes found escape sequences. + - It does NOT validate 'by' and 'for' contents as specified in section + 6. + - It does NOT validate 'host' contents (Host ABNF). + - It does NOT validate 'proto' contents for valid URI scheme names. + + Returns a dict(by=tuple(...), for=tuple(...), host=tuple(...), + proto=tuple(...), ) + """ + params = {'by': [], 'for': [], 'host': [], 'proto': []} + if hdrs.FORWARDED in self._message.headers: + for forwarded_elm in self._message.headers.getall(hdrs.FORWARDED): + if len(forwarded_elm): + forwarded_pairs = tuple( + _FORWARDED_PAIR_RE.findall(pair) + for pair in forwarded_elm.split(';')) + for forwarded_pair in forwarded_pairs: + if len(forwarded_pair) != 1: + # non-compliant syntax, ignore + continue + param = forwarded_pair[0][0].lower() + value = forwarded_pair[0][1] + if len(value) and value[0] == '"': + # quoted string: replace quotes and escape + # sequences + value = _QUOTED_PAIR_REPLACE_RE.sub( + r'\1', value[1:-1]) + params[param].append(value) + return params @reify def _scheme(self): @@ -171,8 +231,8 @@ def _scheme(self): header, value = self._secure_proxy_ssl_header if self.headers.get(header) == value: proto = 'https' - elif self._forwarded: - _, _, _, proto = self._forwarded + elif self.forwarded['proto']: + proto = self.forwarded['proto'][0] elif hdrs.X_FORWARDED_PROTO in self._message.headers: proto = self._message.headers[hdrs.X_FORWARDED_PROTO] return proto @@ -206,8 +266,8 @@ def host(self): Returns str, or None if no hostname is found in the headers. """ host = None - if self._forwarded: - _, _, host, _ = self._forwarded + if self.forwarded['host']: + host = self.forwarded['host'][0] elif hdrs.X_FORWARDED_HOST in self._message.headers: host = self._message.headers[hdrs.X_FORWARDED_HOST] elif hdrs.HOST in self._message.headers: diff --git a/tests/test_web_request.py b/tests/test_web_request.py index 0302dfa3e9b..b51f23d94eb 100644 --- a/tests/test_web_request.py +++ b/tests/test_web_request.py @@ -248,6 +248,47 @@ def test_https_scheme_by_secure_proxy_ssl_header_false_test(make_request): assert req.secure is False +def test_single_forwarded_header(make_request): + header = 'by=identifier; for=identifier; host=identifier; proto=identifier' + req = make_request('GET', '/', headers=CIMultiDict({'Forwarded': header})) + assert req.forwarded['by'] == ['identifier',] + assert req.forwarded['for'] == ['identifier',] + assert req.forwarded['host'] == ['identifier',] + assert req.forwarded['proto'] == ['identifier',] + +def test_single_forwarded_header_camelcase(make_request): + header = 'bY=identifier; fOr=identifier; HOst=identifier; pRoTO=identifier' + req = make_request('GET', '/', headers=CIMultiDict({'Forwarded': header})) + assert req.forwarded['by'] == ['identifier',] + assert req.forwarded['for'] == ['identifier',] + assert req.forwarded['host'] == ['identifier',] + assert req.forwarded['proto'] == ['identifier',] + +def test_single_forwarded_header_single_param(make_request): + header = 'BY=identifier' + req = make_request('GET', '/', headers=CIMultiDict({'Forwarded': header})) + assert req.forwarded['by'] == ['identifier',] + +def test_single_forwarded_header_multiple_param(make_request): + header = 'By=identifier1;BY=identifier2; By=identifier3; BY=identifier4' + req = make_request('GET', '/', headers=CIMultiDict({'Forwarded': header})) + assert req.forwarded['by'] == ['identifier1', 'identifier2', 'identifier3', + 'identifier4'] + +def test_single_forwarded_header_quoted_escaped(make_request): + header = 'Proto=identifier; pROTO="\lala lan\d\~ 123\!&"' + req = make_request('GET', '/', headers=CIMultiDict({'Forwarded': header})) + assert req.forwarded['proto'] == ['identifier', 'lala land~ 123!&'] + +def test_multiple_forwarded_headers(make_request): + headers = CIMultiDict() + headers.add('Forwarded', 'By=identifier1;BY=identifier2') + headers.add('Forwarded', 'By=identifier3; BY=identifier4') + req = make_request('GET', '/', headers=headers) + assert req.forwarded['by'] == ['identifier1', 'identifier2', 'identifier3', + 'identifier4'] + + def test_https_scheme_by_forwarded_header(make_request): req = make_request('GET', '/', headers=CIMultiDict( From 5560bab4ba313323df74922f81c1258d039a4b05 Mon Sep 17 00:00:00 2001 From: Evert Lammerts Date: Sun, 21 May 2017 08:46:22 +0200 Subject: [PATCH 18/25] flake --- aiohttp/web_request.py | 2 +- tests/test_web_request.py | 23 ++++++++++++++--------- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/aiohttp/web_request.py b/aiohttp/web_request.py index 5d7a08e5a49..942eef7e19c 100644 --- a/aiohttp/web_request.py +++ b/aiohttp/web_request.py @@ -199,7 +199,7 @@ def forwarded(self): - It does NOT validate 'proto' contents for valid URI scheme names. Returns a dict(by=tuple(...), for=tuple(...), host=tuple(...), - proto=tuple(...), ) + proto=tuple(...), ) """ params = {'by': [], 'for': [], 'host': [], 'proto': []} if hdrs.FORWARDED in self._message.headers: diff --git a/tests/test_web_request.py b/tests/test_web_request.py index b51f23d94eb..57a846a65aa 100644 --- a/tests/test_web_request.py +++ b/tests/test_web_request.py @@ -251,23 +251,26 @@ def test_https_scheme_by_secure_proxy_ssl_header_false_test(make_request): def test_single_forwarded_header(make_request): header = 'by=identifier; for=identifier; host=identifier; proto=identifier' req = make_request('GET', '/', headers=CIMultiDict({'Forwarded': header})) - assert req.forwarded['by'] == ['identifier',] - assert req.forwarded['for'] == ['identifier',] - assert req.forwarded['host'] == ['identifier',] - assert req.forwarded['proto'] == ['identifier',] + assert req.forwarded['by'] == ['identifier'] + assert req.forwarded['for'] == ['identifier'] + assert req.forwarded['host'] == ['identifier'] + assert req.forwarded['proto'] == ['identifier'] + def test_single_forwarded_header_camelcase(make_request): header = 'bY=identifier; fOr=identifier; HOst=identifier; pRoTO=identifier' req = make_request('GET', '/', headers=CIMultiDict({'Forwarded': header})) - assert req.forwarded['by'] == ['identifier',] - assert req.forwarded['for'] == ['identifier',] - assert req.forwarded['host'] == ['identifier',] - assert req.forwarded['proto'] == ['identifier',] + assert req.forwarded['by'] == ['identifier'] + assert req.forwarded['for'] == ['identifier'] + assert req.forwarded['host'] == ['identifier'] + assert req.forwarded['proto'] == ['identifier'] + def test_single_forwarded_header_single_param(make_request): header = 'BY=identifier' req = make_request('GET', '/', headers=CIMultiDict({'Forwarded': header})) - assert req.forwarded['by'] == ['identifier',] + assert req.forwarded['by'] == ['identifier'] + def test_single_forwarded_header_multiple_param(make_request): header = 'By=identifier1;BY=identifier2; By=identifier3; BY=identifier4' @@ -275,11 +278,13 @@ def test_single_forwarded_header_multiple_param(make_request): assert req.forwarded['by'] == ['identifier1', 'identifier2', 'identifier3', 'identifier4'] + def test_single_forwarded_header_quoted_escaped(make_request): header = 'Proto=identifier; pROTO="\lala lan\d\~ 123\!&"' req = make_request('GET', '/', headers=CIMultiDict({'Forwarded': header})) assert req.forwarded['proto'] == ['identifier', 'lala land~ 123!&'] + def test_multiple_forwarded_headers(make_request): headers = CIMultiDict() headers.add('Forwarded', 'By=identifier1;BY=identifier2') From ef0c597d9fd39c1dbc4fc95506ecb3cccda52682 Mon Sep 17 00:00:00 2001 From: Evert Lammerts Date: Sun, 21 May 2017 13:10:33 +0200 Subject: [PATCH 19/25] small changes --- aiohttp/web_request.py | 37 +++++++++++++++++-------------------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/aiohttp/web_request.py b/aiohttp/web_request.py index 942eef7e19c..0143eb6ac98 100644 --- a/aiohttp/web_request.py +++ b/aiohttp/web_request.py @@ -201,26 +201,23 @@ def forwarded(self): Returns a dict(by=tuple(...), for=tuple(...), host=tuple(...), proto=tuple(...), ) """ - params = {'by': [], 'for': [], 'host': [], 'proto': []} - if hdrs.FORWARDED in self._message.headers: - for forwarded_elm in self._message.headers.getall(hdrs.FORWARDED): - if len(forwarded_elm): - forwarded_pairs = tuple( - _FORWARDED_PAIR_RE.findall(pair) - for pair in forwarded_elm.split(';')) - for forwarded_pair in forwarded_pairs: - if len(forwarded_pair) != 1: - # non-compliant syntax, ignore - continue - param = forwarded_pair[0][0].lower() - value = forwarded_pair[0][1] - if len(value) and value[0] == '"': - # quoted string: replace quotes and escape - # sequences - value = _QUOTED_PAIR_REPLACE_RE.sub( - r'\1', value[1:-1]) - params[param].append(value) - return params + params = MultiDict({'by': [], 'for': [], 'host': [], 'proto': []}) + for forwarded_elm in self._message.headers.getall(hdrs.FORWARDED, ()): + forwarded_pairs = (_FORWARDED_PAIR_RE.findall(pair) + for pair in forwarded_elm.split(';')) + for forwarded_pair in forwarded_pairs: + if len(forwarded_pair) != 1: + # non-compliant syntax, ignore + continue + param = forwarded_pair[0][0].lower() + value = forwarded_pair[0][1] + if value and value[0] == '"': + # quoted string: replace quotes and escape + # sequences + value = _QUOTED_PAIR_REPLACE_RE.sub( + r'\1', value[1:-1]) + params[param].append(value) + return MultiDictProxy(params) @reify def _scheme(self): From cd643a754cc71a613ef77b2a6286ba6e1c7b910b Mon Sep 17 00:00:00 2001 From: Evert Lammerts Date: Sun, 21 May 2017 13:17:44 +0200 Subject: [PATCH 20/25] unpacking tuple implicit --- aiohttp/web_request.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/aiohttp/web_request.py b/aiohttp/web_request.py index 0143eb6ac98..90a022d3c03 100644 --- a/aiohttp/web_request.py +++ b/aiohttp/web_request.py @@ -209,14 +209,13 @@ def forwarded(self): if len(forwarded_pair) != 1: # non-compliant syntax, ignore continue - param = forwarded_pair[0][0].lower() - value = forwarded_pair[0][1] + param, value = forwarded_pair[0] if value and value[0] == '"': # quoted string: replace quotes and escape # sequences value = _QUOTED_PAIR_REPLACE_RE.sub( r'\1', value[1:-1]) - params[param].append(value) + params[param.lower()].append(value) return MultiDictProxy(params) @reify From d1ba86bb553c2e60a092b87f54e3a42120cf37de Mon Sep 17 00:00:00 2001 From: Evert Lammerts Date: Mon, 22 May 2017 16:47:24 +0200 Subject: [PATCH 21/25] handling multiple field-values, fixed quoted-pair bug (now using vchar instead of tchar), changed return type of forwarded --- aiohttp/web_request.py | 99 +++++++++++++++++++++++---------------- tests/test_web_request.py | 58 +++++++++++++---------- 2 files changed, 91 insertions(+), 66 deletions(-) diff --git a/aiohttp/web_request.py b/aiohttp/web_request.py index 90a022d3c03..b497b5a7f08 100644 --- a/aiohttp/web_request.py +++ b/aiohttp/web_request.py @@ -5,6 +5,7 @@ import re import string import tempfile +import types import warnings from email.utils import parsedate from types import MappingProxyType @@ -23,17 +24,17 @@ FileField = collections.namedtuple( 'Field', 'name filename file content_type headers') -_TCHAR = string.digits + string.ascii_letters + r"!#$%&'*+\-.^_`|~" -# notice the escape of '-' to prevent interpretation as range +_TCHAR = string.digits + string.ascii_letters + r"!#$%&'*+.^_`|~-" +# '-' at the end to prevent interpretation as range in a char class _TOKEN = r'[{tchar}]*'.format(tchar=_TCHAR) _QDTEXT = r'[{}]'.format( - r''.join(chr(c) for c in (0x09, 0x20, 0x21, *range(0x23, 0x7F)))) + r''.join(chr(c) for c in (0x09, 0x20, 0x21) + tuple(range(0x23, 0x7F)))) # qdtext includes 0x5C to escape 0x5D ('\]') # qdtext excludes obs-text (because obsoleted, and encoding not specified) -_QUOTED_PAIR = r'\\[\t {tchar}]'.format(tchar=_TCHAR) +_QUOTED_PAIR = r'\\[\t !-~]' _QUOTED_STRING = r'"(?:{quoted_pair}|{qdtext})*"'.format( qdtext=_QDTEXT, quoted_pair=_QUOTED_PAIR) @@ -42,13 +43,12 @@ r'[bB][yY]|[fF][oO][rR]|[hH][oO][sS][tT]|[pP][rR][oO][tT][oO]') _FORWARDED_PAIR = ( - r'^ *({forwarded_params})=({token}|{quoted_string}) *$'.format( + r'^({forwarded_params})=({token}|{quoted_string})$'.format( forwarded_params=_FORWARDED_PARAMS, token=_TOKEN, quoted_string=_QUOTED_STRING)) -# allow whitespace as specified in RFC 7239 section 7.1 -_QUOTED_PAIR_REPLACE_RE = re.compile(r'\\([\t {tchar}])'.format(tchar=_TCHAR)) +_QUOTED_PAIR_REPLACE_RE = re.compile(r'\\([\t !-~])') # same pattern as _QUOTED_PAIR but contains a capture group _FORWARDED_PAIR_RE = re.compile(_FORWARDED_PAIR) @@ -183,55 +183,72 @@ def secure(self): @reify def forwarded(self): - """ A frozendict containing parsed Forwarded header(s). + """ A tuple containing all parsed Forwarded header(s). Makes an effort to parse Forwarded headers as specified by RFC 7239: - - It adds all parameters (by, for, host, proto) in the order it finds - them; starting at the topmost / first added 'Forwarded' header, at - the leftmost / first-added parwameter. - - It checks that the value has valid syntax in general as specified in - section 4: either a 'token' or a 'quoted-string'. + - It adds one (immutable) dictionary per Forwarded 'field-value', ie + per proxy. The element corresponds to the data in the Forwarded + field-value added by the first proxy encountered by the client. Each + subsequent item corresponds to those added by later proxies. + - It checks that every value has valid syntax in general as specified + in section 4: either a 'token' or a 'quoted-string'. - It un-escapes found escape sequences. - It does NOT validate 'by' and 'for' contents as specified in section 6. - It does NOT validate 'host' contents (Host ABNF). - It does NOT validate 'proto' contents for valid URI scheme names. - Returns a dict(by=tuple(...), for=tuple(...), host=tuple(...), - proto=tuple(...), ) + Returns a tuple containing one or more immutable dicts """ - params = MultiDict({'by': [], 'for': [], 'host': [], 'proto': []}) - for forwarded_elm in self._message.headers.getall(hdrs.FORWARDED, ()): - forwarded_pairs = (_FORWARDED_PAIR_RE.findall(pair) - for pair in forwarded_elm.split(';')) - for forwarded_pair in forwarded_pairs: - if len(forwarded_pair) != 1: - # non-compliant syntax, ignore - continue - param, value = forwarded_pair[0] - if value and value[0] == '"': - # quoted string: replace quotes and escape - # sequences - value = _QUOTED_PAIR_REPLACE_RE.sub( - r'\1', value[1:-1]) - params[param.lower()].append(value) - return MultiDictProxy(params) + def _parse_forwarded(forwarded_headers): + for field_value in forwarded_headers: + # by=...;for=..., For=..., BY=... + for forwarded_elm in field_value.split(','): + # by=...;for=... + fvparams = dict() + forwarded_pairs = ( + _FORWARDED_PAIR_RE.findall(pair) + for pair in forwarded_elm.strip().split(';')) + for forwarded_pair in forwarded_pairs: + # by=... + if len(forwarded_pair) != 1: + # non-compliant syntax + break + param, value = forwarded_pair[0] + if param.lower() in fvparams: + # duplicate param in field-value + break + if value and value[0] == '"': + # quoted string: replace quotes and escape + # sequences + value = _QUOTED_PAIR_REPLACE_RE.sub( + r'\1', value[1:-1]) + fvparams[param.lower()] = value + else: + yield types.MappingProxyType(fvparams) + continue + yield dict() + + return tuple( + _parse_forwarded(self._message.headers.getall(hdrs.FORWARDED, ()))) @reify def _scheme(self): - proto = 'http' + proto = None if self._transport.get_extra_info('sslcontext'): proto = 'https' elif self._secure_proxy_ssl_header is not None: header, value = self._secure_proxy_ssl_header if self.headers.get(header) == value: proto = 'https' - elif self.forwarded['proto']: - proto = self.forwarded['proto'][0] - elif hdrs.X_FORWARDED_PROTO in self._message.headers: - proto = self._message.headers[hdrs.X_FORWARDED_PROTO] - return proto + else: + proto = next( + (f['proto'] for f in self.forwarded if 'proto' in f), None + ) + if not proto and hdrs.X_FORWARDED_PROTO in self._message.headers: + proto = self._message.headers[hdrs.X_FORWARDED_PROTO] + return proto or 'http' @property def method(self): @@ -261,10 +278,10 @@ def host(self): Returns str, or None if no hostname is found in the headers. """ - host = None - if self.forwarded['host']: - host = self.forwarded['host'][0] - elif hdrs.X_FORWARDED_HOST in self._message.headers: + host = next( + (f['host'] for f in self.forwarded if 'host' in f), None + ) + if not host and hdrs.X_FORWARDED_HOST in self._message.headers: host = self._message.headers[hdrs.X_FORWARDED_HOST] elif hdrs.HOST in self._message.headers: host = self._message.headers[hdrs.HOST] diff --git a/tests/test_web_request.py b/tests/test_web_request.py index 57a846a65aa..440cfd71e59 100644 --- a/tests/test_web_request.py +++ b/tests/test_web_request.py @@ -249,55 +249,63 @@ def test_https_scheme_by_secure_proxy_ssl_header_false_test(make_request): def test_single_forwarded_header(make_request): - header = 'by=identifier; for=identifier; host=identifier; proto=identifier' + header = 'by=identifier;for=identifier;host=identifier;proto=identifier' req = make_request('GET', '/', headers=CIMultiDict({'Forwarded': header})) - assert req.forwarded['by'] == ['identifier'] - assert req.forwarded['for'] == ['identifier'] - assert req.forwarded['host'] == ['identifier'] - assert req.forwarded['proto'] == ['identifier'] + assert req.forwarded[0]['by'] == 'identifier' + assert req.forwarded[0]['for'] == 'identifier' + assert req.forwarded[0]['host'] == 'identifier' + assert req.forwarded[0]['proto'] == 'identifier' def test_single_forwarded_header_camelcase(make_request): - header = 'bY=identifier; fOr=identifier; HOst=identifier; pRoTO=identifier' + header = 'bY=identifier;fOr=identifier;HOst=identifier;pRoTO=identifier' req = make_request('GET', '/', headers=CIMultiDict({'Forwarded': header})) - assert req.forwarded['by'] == ['identifier'] - assert req.forwarded['for'] == ['identifier'] - assert req.forwarded['host'] == ['identifier'] - assert req.forwarded['proto'] == ['identifier'] + assert req.forwarded[0]['by'] == 'identifier' + assert req.forwarded[0]['for'] == 'identifier' + assert req.forwarded[0]['host'] == 'identifier' + assert req.forwarded[0]['proto'] == 'identifier' def test_single_forwarded_header_single_param(make_request): header = 'BY=identifier' req = make_request('GET', '/', headers=CIMultiDict({'Forwarded': header})) - assert req.forwarded['by'] == ['identifier'] + assert req.forwarded[0]['by'] == 'identifier' def test_single_forwarded_header_multiple_param(make_request): - header = 'By=identifier1;BY=identifier2; By=identifier3; BY=identifier4' + header = 'By=identifier1,BY=identifier2, By=identifier3 , BY=identifier4' req = make_request('GET', '/', headers=CIMultiDict({'Forwarded': header})) - assert req.forwarded['by'] == ['identifier1', 'identifier2', 'identifier3', - 'identifier4'] + assert len(req.forwarded) == 4 + assert req.forwarded[0]['by'] == 'identifier1' + assert req.forwarded[1]['by'] == 'identifier2' + assert req.forwarded[2]['by'] == 'identifier3' + assert req.forwarded[3]['by'] == 'identifier4' def test_single_forwarded_header_quoted_escaped(make_request): - header = 'Proto=identifier; pROTO="\lala lan\d\~ 123\!&"' + header = 'BY=identifier;pROTO="\lala lan\d\~ 123\!&"' req = make_request('GET', '/', headers=CIMultiDict({'Forwarded': header})) - assert req.forwarded['proto'] == ['identifier', 'lala land~ 123!&'] + assert req.forwarded[0]['by'] == 'identifier' + assert req.forwarded[0]['proto'] == 'lala land~ 123!&' def test_multiple_forwarded_headers(make_request): headers = CIMultiDict() - headers.add('Forwarded', 'By=identifier1;BY=identifier2') - headers.add('Forwarded', 'By=identifier3; BY=identifier4') + headers.add('Forwarded', 'By=identifier1;for=identifier2, BY=identifier3') + headers.add('Forwarded', 'By=identifier4;fOr=identifier5') req = make_request('GET', '/', headers=headers) - assert req.forwarded['by'] == ['identifier1', 'identifier2', 'identifier3', - 'identifier4'] + assert len(req.forwarded) == 3 + assert req.forwarded[0]['by'] == 'identifier1' + assert req.forwarded[0]['for'] == 'identifier2' + assert req.forwarded[1]['by'] == 'identifier3' + assert req.forwarded[2]['by'] == 'identifier4' + assert req.forwarded[2]['for'] == 'identifier5' def test_https_scheme_by_forwarded_header(make_request): req = make_request('GET', '/', headers=CIMultiDict( - {'Forwarded': 'by=; for=; host=; proto=https'})) + {'Forwarded': 'by=;for=;host=;proto=https'})) assert "https" == req.scheme assert req.secure is True @@ -324,10 +332,10 @@ def test_https_scheme_by_x_forwarded_proto_header_no_tls(make_request): def test_host_by_forwarded_header(make_request): - req = make_request('GET', '/', - headers=CIMultiDict( - {'Forwarded': 'by=; for=; host' - '=example.com; proto=https'})) + headers = CIMultiDict() + headers.add('Forwarded', 'By=identifier1;for=identifier2, BY=identifier3') + headers.add('Forwarded', 'by=;for=;host=example.com') + req = make_request('GET', '/', headers=headers) assert req.host == 'example.com' From 561a14fdb499574f4bb89ff5e023db396a810672 Mon Sep 17 00:00:00 2001 From: Evert Lammerts Date: Mon, 22 May 2017 23:24:52 +0200 Subject: [PATCH 22/25] docs for forwarded --- docs/web_reference.rst | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/docs/web_reference.rst b/docs/web_reference.rst index 21988c6adb4..fbc18bd3cf1 100644 --- a/docs/web_reference.rst +++ b/docs/web_reference.rst @@ -89,11 +89,42 @@ and :ref:`aiohttp-web-signals` handlers. .. seealso:: :attr:`scheme` + .. attribute:: forwarded + + A tuple containing all parsed Forwarded header(s). + + Makes an effort to parse Forwarded headers as specified by :rfc:`7239`: + + - It adds one (immutable) dictionary per Forwarded ``field-value``, ie + per proxy. The element corresponds to the data in the Forwarded + ``field-value`` added by the first proxy encountered by the client. + Each subsequent item corresponds to those added by later proxies. + - It checks that every value has valid syntax in general as specified + in :rfc:`7239#section-4`: either a ``token`` or a ``quoted-string``. + - It un-escapes ``quoted-pairs``. + - It does NOT validate 'by' and 'for' contents as specified in + :rfc:`7239#section-6`. + - It does NOT validate 'host' contents (Host ABNF). + - It does NOT validate 'proto' contents for valid URI scheme names. + + Returns a tuple containing one or more immutable dicts + + .. seealso:: :attr:`scheme` + + .. seealso:: :attr:`host` + .. attribute:: host - *HOST* header of request, Read-only property. + Hostname of the request. + + Hostname is resolved through the following headers, in this order: + + - *Forwarded* + - *X-Forwarded-Host* + - *Host* - Returns :class:`str` or ``None`` if HTTP request has no *HOST* header. + Returns :class:`str`, or ``None`` if no hostname is found in the + headers. .. attribute:: path_qs From 6b651ec1e96845d2570c993d8913c981f4dc5829 Mon Sep 17 00:00:00 2001 From: Evert Lammerts Date: Mon, 22 May 2017 23:25:19 +0200 Subject: [PATCH 23/25] removed double entry from changes --- CHANGES.rst | 3 --- 1 file changed, 3 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 9373390f1f1..086b6546e52 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -44,9 +44,6 @@ Changes - Do not unquote `+` in match_info values #1816 -- Use Forwarded, X-Forwarded-Scheme and X-Forwarded-Host for better scheme and - host resolution. #1134 - - Fix sub-application middlewares resolution order #1853 - Fix applications comparison #1866 From 448e658106ea828d40db90065e19cf8d8d9024c9 Mon Sep 17 00:00:00 2001 From: Evert Lammerts Date: Mon, 22 May 2017 23:27:43 +0200 Subject: [PATCH 24/25] Docs for forwarded --- docs/web_reference.rst | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/docs/web_reference.rst b/docs/web_reference.rst index 21988c6adb4..fbc18bd3cf1 100644 --- a/docs/web_reference.rst +++ b/docs/web_reference.rst @@ -89,11 +89,42 @@ and :ref:`aiohttp-web-signals` handlers. .. seealso:: :attr:`scheme` + .. attribute:: forwarded + + A tuple containing all parsed Forwarded header(s). + + Makes an effort to parse Forwarded headers as specified by :rfc:`7239`: + + - It adds one (immutable) dictionary per Forwarded ``field-value``, ie + per proxy. The element corresponds to the data in the Forwarded + ``field-value`` added by the first proxy encountered by the client. + Each subsequent item corresponds to those added by later proxies. + - It checks that every value has valid syntax in general as specified + in :rfc:`7239#section-4`: either a ``token`` or a ``quoted-string``. + - It un-escapes ``quoted-pairs``. + - It does NOT validate 'by' and 'for' contents as specified in + :rfc:`7239#section-6`. + - It does NOT validate 'host' contents (Host ABNF). + - It does NOT validate 'proto' contents for valid URI scheme names. + + Returns a tuple containing one or more immutable dicts + + .. seealso:: :attr:`scheme` + + .. seealso:: :attr:`host` + .. attribute:: host - *HOST* header of request, Read-only property. + Hostname of the request. + + Hostname is resolved through the following headers, in this order: + + - *Forwarded* + - *X-Forwarded-Host* + - *Host* - Returns :class:`str` or ``None`` if HTTP request has no *HOST* header. + Returns :class:`str`, or ``None`` if no hostname is found in the + headers. .. attribute:: path_qs From 36e50c42996ae2df680b2040d3997f70b4232e04 Mon Sep 17 00:00:00 2001 From: Evert Lammerts Date: Tue, 23 May 2017 20:14:44 +0200 Subject: [PATCH 25/25] docs pass spellchecker --- docs/web_reference.rst | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/web_reference.rst b/docs/web_reference.rst index fbc18bd3cf1..90518e07c8d 100644 --- a/docs/web_reference.rst +++ b/docs/web_reference.rst @@ -95,7 +95,7 @@ and :ref:`aiohttp-web-signals` handlers. Makes an effort to parse Forwarded headers as specified by :rfc:`7239`: - - It adds one (immutable) dictionary per Forwarded ``field-value``, ie + - It adds one (immutable) dictionary per Forwarded ``field-value``, i.e. per proxy. The element corresponds to the data in the Forwarded ``field-value`` added by the first proxy encountered by the client. Each subsequent item corresponds to those added by later proxies. @@ -104,10 +104,10 @@ and :ref:`aiohttp-web-signals` handlers. - It un-escapes ``quoted-pairs``. - It does NOT validate 'by' and 'for' contents as specified in :rfc:`7239#section-6`. - - It does NOT validate 'host' contents (Host ABNF). - - It does NOT validate 'proto' contents for valid URI scheme names. + - It does NOT validate ``host`` contents (Host ABNF). + - It does NOT validate ``proto`` contents for valid URI scheme names. - Returns a tuple containing one or more immutable dicts + Returns a tuple containing one or more ``MappingProxy`` objects .. seealso:: :attr:`scheme` @@ -115,15 +115,15 @@ and :ref:`aiohttp-web-signals` handlers. .. attribute:: host - Hostname of the request. + Host name of the request. - Hostname is resolved through the following headers, in this order: + Host name is resolved through the following headers, in this order: - *Forwarded* - *X-Forwarded-Host* - *Host* - Returns :class:`str`, or ``None`` if no hostname is found in the + Returns :class:`str`, or ``None`` if no host name is found in the headers. .. attribute:: path_qs