Skip to content

Commit

Permalink
Fix seeking past end of file for s3 backend
Browse files Browse the repository at this point in the history
  • Loading branch information
jcushman committed Oct 15, 2020
1 parent c15c5d2 commit 26ab767
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 12 deletions.
15 changes: 3 additions & 12 deletions smart_open/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
_SLEEP_SECONDS = 10

# Returned by AWS when we try to seek beyond EOF.
_OUT_OF_RANGE = 'Requested Range Not Satisfiable'
_OUT_OF_RANGE = 'InvalidRange'


def parse_uri(uri_as_string):
Expand Down Expand Up @@ -385,18 +385,9 @@ def _open_body(self, start=None, stop=None):
except IOError as ioe:
# Handle requested content range exceeding content size.
error_response = _unwrap_ioerror(ioe)
if error_response is None or error_response.get('Message') != _OUT_OF_RANGE:
if error_response is None or error_response.get('Code') != _OUT_OF_RANGE:
raise
try:
self._position = self._content_length = int(error_response['ActualObjectSize'])
except KeyError:
# This shouldn't happen with real S3, but moto lacks ActualObjectSize.
# Reported at https://github.com/spulec/moto/issues/2981
self._position = self._content_length = _get(
self._object,
version=self._version_id,
**self._object_kwargs,
)['ContentLength']
self._position = self._content_length = int(error_response['ActualObjectSize'])
self._body = io.BytesIO()
else:
units, start, stop, length = smart_open.utils.parse_content_range(response['ContentRange'])
Expand Down
38 changes: 38 additions & 0 deletions smart_open/tests/test_s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,26 @@ def mock_make_request(self, operation_model, *args, **kwargs):
finally:
patcher.stop()

@contextmanager
def patch_invalid_range_response(self, actual_size):
""" Work around a bug in moto (https://github.com/spulec/moto/issues/2981) where the
API response doesn't match when requesting an invalid range of bytes from an S3 GetObject. """
_real_get = smart_open.s3._get

def mock_get(*args, **kwargs):
try:
return _real_get(*args, **kwargs)
except IOError as ioe:
error_response = smart_open.s3._unwrap_ioerror(ioe)
if error_response and error_response.get('Message') == 'Requested Range Not Satisfiable':
error_response['ActualObjectSize'] = actual_size
error_response['Code'] = 'InvalidRange'
error_response['Message'] = 'The requested range is not satisfiable'
raise

with patch('smart_open.s3._get', new=mock_get):
yield


@unittest.skipUnless(
ENABLE_MOTO_SERVER,
Expand Down Expand Up @@ -236,6 +256,15 @@ def test_seek_end(self):
self.assertEqual(seek, len(content) - 4)
self.assertEqual(fin.read(), b'you?')

def test_seek_past_end(self):
content = u"hello wořld\nhow are you?".encode('utf8')
put_to_bucket(contents=content)

with self.assertApiCalls(GetObject=1), self.patch_invalid_range_response(str(len(content))):
fin = smart_open.s3.SeekableBufferedInputBase(BUCKET_NAME, KEY_NAME, defer_seek=True)
seek = fin.seek(60)
self.assertEqual(seek, len(content))

def test_detect_eof(self):
content = u"hello wořld\nhow are you?".encode('utf8')
put_to_bucket(contents=content)
Expand Down Expand Up @@ -352,6 +381,15 @@ def test_defer_seek(self):
fin.seek(10)
self.assertEqual(fin.read(), content[10:])

def test_read_empty_file(self):
put_to_bucket(contents=b'')

with self.assertApiCalls(GetObject=1), self.patch_invalid_range_response('0'):
with smart_open.s3.SeekableBufferedInputBase(BUCKET_NAME, KEY_NAME) as fin:
data = fin.read()

self.assertEqual(data, b'')


@moto.mock_s3
class MultipartWriterTest(unittest.TestCase):
Expand Down

0 comments on commit 26ab767

Please sign in to comment.