Skip to content

Commit 98f6d36

Browse files
authored
Use AF_UNSPEC for name resolution (#389)
* Use AF_UNSPEC for name resolution This reverts most of 1ad97fb, but keeps tests which still have general applicability. The reason the original change was made was to try and work around a bug[1] in the eventlet library. Eventlet monkey-patches the socket.getaddrinfo function and replaces it with its own async, eventlet-aware implementation. The reason name resolution was broken in the first place is because eventlet was consulting DNS first, and then if that failed, falling back to /etc/hosts, which is just flat out incorrect behavior. It's important to note that this was *only* when running py-amqp under eventlet, and *only* for specific versions of eventlet that have long been fixed. So this workaround is not even needed anymore. With "normal" (non-eventlet) use, socket.getaddrinfo instead calls into the glibc getaddrinfo implementation, which ultimately uses libnss to resolve hostnames. However, there is an issue with the original workaround when using the default (glibc) getaddrinfo. The workaround (current) implementation explicitly forces resolution to use AF_INET (IPv4) and then only if that does not succeed, it in turn will try with AF_INET6 (IPv6). This generally works well for IPv4-only hosts, but can be unnecessarily slow for dual-stack IPv4/IPv6 hosts. Consider the following: - We want to connect to example.org - The /etc/hosts file contains an IPv6 entry: example.org f00d::1 - The /etc/nsswitch.conf file contains typical (simplified) hosts config: hosts: files dns In this case, the current code will involve nss iterating through the modules: - files (with AF_INET): fails, because there is no IPv4 address in /etc/hosts - dns (with AF_INET): may or may not succeed per-site, depending on how DNS is configured. If DNS is slow/misconfigured, this may incur a delay and block for a significant amount of time. - files (with AF_INET6): succeeds, and getaddrinfo returns f00d::1. Now in the same scenario as before, with this fix which reverts back to using AF_UNSPEC instead: - files (with AF_UNSPEC) succeeds, and getaddrinfo returns f00d::1. There is no need to involve DNS at all. Even a well-configured, quick-to-respond DNS server is going to be many orders of magnitude slower than consulting with /etc/hosts which libnss keeps cached in memory. [1] https://bugs.launchpad.net/neutron/+bug/1696094/comments/22 * tests: ensure getaddrinfo is called with AF_UNSPEC
1 parent 4e69059 commit 98f6d36

File tree

2 files changed

+22
-88
lines changed

2 files changed

+22
-88
lines changed

amqp/transport.py

+19-51
Original file line numberDiff line numberDiff line change
@@ -169,59 +169,27 @@ def having_timeout(self, timeout):
169169
sock.settimeout(prev)
170170

171171
def _connect(self, host, port, timeout):
172-
e = None
173-
174-
# Below we are trying to avoid additional DNS requests for AAAA if A
175-
# succeeds. This helps a lot in case when a hostname has an IPv4 entry
176-
# in /etc/hosts but not IPv6. Without the (arguably somewhat twisted)
177-
# logic below, getaddrinfo would attempt to resolve the hostname for
178-
# both IP versions, which would make the resolver talk to configured
179-
# DNS servers. If those servers are for some reason not available
180-
# during resolution attempt (either because of system misconfiguration,
181-
# or network connectivity problem), resolution process locks the
182-
# _connect call for extended time.
183-
addr_types = (socket.AF_INET, socket.AF_INET6)
184-
addr_types_num = len(addr_types)
185-
for n, family in enumerate(addr_types):
186-
# first, resolve the address for a single address family
172+
entries = socket.getaddrinfo(
173+
host, port, socket.AF_UNSPEC, socket.SOCK_STREAM, SOL_TCP,
174+
)
175+
for i, res in enumerate(entries):
176+
af, socktype, proto, canonname, sa = res
187177
try:
188-
entries = socket.getaddrinfo(
189-
host, port, family, socket.SOCK_STREAM, SOL_TCP)
190-
entries_num = len(entries)
191-
except socket.gaierror:
192-
# we may have depleted all our options
193-
if n + 1 >= addr_types_num:
194-
# if getaddrinfo succeeded before for another address
195-
# family, reraise the previous socket.error since it's more
196-
# relevant to users
197-
raise (e
198-
if e is not None
199-
else socket.error(
200-
"failed to resolve broker hostname"))
201-
continue # pragma: no cover
202-
203-
# now that we have address(es) for the hostname, connect to broker
204-
for i, res in enumerate(entries):
205-
af, socktype, proto, _, sa = res
178+
self.sock = socket.socket(af, socktype, proto)
206179
try:
207-
self.sock = socket.socket(af, socktype, proto)
208-
try:
209-
set_cloexec(self.sock, True)
210-
except NotImplementedError:
211-
pass
212-
self.sock.settimeout(timeout)
213-
self.sock.connect(sa)
214-
except OSError as ex:
215-
e = ex
216-
if self.sock is not None:
217-
self.sock.close()
218-
self.sock = None
219-
# we may have depleted all our options
220-
if i + 1 >= entries_num and n + 1 >= addr_types_num:
221-
raise
222-
else:
223-
# hurray, we established connection
224-
return
180+
set_cloexec(self.sock, True)
181+
except NotImplementedError:
182+
pass
183+
self.sock.settimeout(timeout)
184+
self.sock.connect(sa)
185+
except socket.error:
186+
if self.sock:
187+
self.sock.close()
188+
self.sock = None
189+
if i + 1 >= len(entries):
190+
raise
191+
else:
192+
break
225193

226194
def _init_socket(self, socket_settings, read_timeout, write_timeout):
227195
self.sock.settimeout(None) # set socket back to blocking mode

t/unit/test_transport.py

+3-37
Original file line numberDiff line numberDiff line change
@@ -520,54 +520,20 @@ def test_connect_multiple_addr_entries_succeed(self):
520520
side_effect=(socket.error, None)):
521521
self.t.connect()
522522

523-
def test_connect_short_curcuit_on_INET_succeed(self):
523+
def test_connect_calls_getaddrinfo_with_af_unspec(self):
524524
with patch('socket.socket', return_value=MockSocket()), \
525-
patch('socket.getaddrinfo',
526-
side_effect=[
527-
[(socket.AF_INET, 1, socket.IPPROTO_TCP,
528-
'', ('127.0.0.1', 5672))],
529-
[(socket.AF_INET6, 1, socket.IPPROTO_TCP,
530-
'', ('::1', 5672))]
531-
]) as getaddrinfo:
525+
patch('socket.getaddrinfo') as getaddrinfo:
532526
self.t.sock = Mock()
533527
self.t.close()
534528
self.t.connect()
535529
getaddrinfo.assert_called_with(
536-
'localhost', 5672, socket.AF_INET, ANY, ANY)
537-
538-
def test_connect_short_curcuit_on_INET_fails(self):
539-
with patch('socket.socket', return_value=MockSocket()) as sock_mock, \
540-
patch('socket.getaddrinfo',
541-
side_effect=[
542-
[(socket.AF_INET, 1, socket.IPPROTO_TCP,
543-
'', ('127.0.0.1', 5672))],
544-
[(socket.AF_INET6, 1, socket.IPPROTO_TCP,
545-
'', ('::1', 5672))]
546-
]) as getaddrinfo:
547-
self.t.sock = Mock()
548-
self.t.close()
549-
with patch.object(sock_mock.return_value, 'connect',
550-
side_effect=(socket.error, None)):
551-
self.t.connect()
552-
getaddrinfo.assert_has_calls(
553-
[call('localhost', 5672, addr_type, ANY, ANY)
554-
for addr_type in (socket.AF_INET, socket.AF_INET6)])
530+
'localhost', 5672, socket.AF_UNSPEC, ANY, ANY)
555531

556532
def test_connect_getaddrinfo_raises_gaierror(self):
557533
with patch('socket.getaddrinfo', side_effect=socket.gaierror):
558534
with pytest.raises(socket.error):
559535
self.t.connect()
560536

561-
def test_connect_getaddrinfo_raises_gaierror_once_recovers(self):
562-
with patch('socket.socket', return_value=MockSocket()), \
563-
patch('socket.getaddrinfo',
564-
side_effect=[
565-
socket.gaierror,
566-
[(socket.AF_INET6, 1, socket.IPPROTO_TCP,
567-
'', ('::1', 5672))]
568-
]):
569-
self.t.connect()
570-
571537
def test_connect_survives_not_implemented_set_cloexec(self):
572538
with patch('socket.socket', return_value=MockSocket()), \
573539
patch('socket.getaddrinfo',

0 commit comments

Comments
 (0)