From 26ab767faa9942c3964743baac4a8163c77e3a1d Mon Sep 17 00:00:00 2001 From: Jack Cushman Date: Thu, 15 Oct 2020 13:49:42 -0400 Subject: [PATCH] Fix seeking past end of file for s3 backend --- smart_open/s3.py | 15 +++------------ smart_open/tests/test_s3.py | 38 +++++++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 12 deletions(-) diff --git a/smart_open/s3.py b/smart_open/s3.py index c8c28a15..e29366e2 100644 --- a/smart_open/s3.py +++ b/smart_open/s3.py @@ -48,7 +48,7 @@ _SLEEP_SECONDS = 10 # Returned by AWS when we try to seek beyond EOF. -_OUT_OF_RANGE = 'Requested Range Not Satisfiable' +_OUT_OF_RANGE = 'InvalidRange' def parse_uri(uri_as_string): @@ -385,18 +385,9 @@ def _open_body(self, start=None, stop=None): except IOError as ioe: # Handle requested content range exceeding content size. error_response = _unwrap_ioerror(ioe) - if error_response is None or error_response.get('Message') != _OUT_OF_RANGE: + if error_response is None or error_response.get('Code') != _OUT_OF_RANGE: raise - try: - self._position = self._content_length = int(error_response['ActualObjectSize']) - except KeyError: - # This shouldn't happen with real S3, but moto lacks ActualObjectSize. - # Reported at https://github.com/spulec/moto/issues/2981 - self._position = self._content_length = _get( - self._object, - version=self._version_id, - **self._object_kwargs, - )['ContentLength'] + self._position = self._content_length = int(error_response['ActualObjectSize']) self._body = io.BytesIO() else: units, start, stop, length = smart_open.utils.parse_content_range(response['ContentRange']) diff --git a/smart_open/tests/test_s3.py b/smart_open/tests/test_s3.py index 857a83b0..164942ba 100644 --- a/smart_open/tests/test_s3.py +++ b/smart_open/tests/test_s3.py @@ -108,6 +108,26 @@ def mock_make_request(self, operation_model, *args, **kwargs): finally: patcher.stop() + @contextmanager + def patch_invalid_range_response(self, actual_size): + """ Work around a bug in moto (https://github.com/spulec/moto/issues/2981) where the + API response doesn't match when requesting an invalid range of bytes from an S3 GetObject. """ + _real_get = smart_open.s3._get + + def mock_get(*args, **kwargs): + try: + return _real_get(*args, **kwargs) + except IOError as ioe: + error_response = smart_open.s3._unwrap_ioerror(ioe) + if error_response and error_response.get('Message') == 'Requested Range Not Satisfiable': + error_response['ActualObjectSize'] = actual_size + error_response['Code'] = 'InvalidRange' + error_response['Message'] = 'The requested range is not satisfiable' + raise + + with patch('smart_open.s3._get', new=mock_get): + yield + @unittest.skipUnless( ENABLE_MOTO_SERVER, @@ -236,6 +256,15 @@ def test_seek_end(self): self.assertEqual(seek, len(content) - 4) self.assertEqual(fin.read(), b'you?') + def test_seek_past_end(self): + content = u"hello wořld\nhow are you?".encode('utf8') + put_to_bucket(contents=content) + + with self.assertApiCalls(GetObject=1), self.patch_invalid_range_response(str(len(content))): + fin = smart_open.s3.SeekableBufferedInputBase(BUCKET_NAME, KEY_NAME, defer_seek=True) + seek = fin.seek(60) + self.assertEqual(seek, len(content)) + def test_detect_eof(self): content = u"hello wořld\nhow are you?".encode('utf8') put_to_bucket(contents=content) @@ -352,6 +381,15 @@ def test_defer_seek(self): fin.seek(10) self.assertEqual(fin.read(), content[10:]) + def test_read_empty_file(self): + put_to_bucket(contents=b'') + + with self.assertApiCalls(GetObject=1), self.patch_invalid_range_response('0'): + with smart_open.s3.SeekableBufferedInputBase(BUCKET_NAME, KEY_NAME) as fin: + data = fin.read() + + self.assertEqual(data, b'') + @moto.mock_s3 class MultipartWriterTest(unittest.TestCase):