Skip to content

Commit 60b3563

Browse files
authored
remove logic to delete a file on task completion (#5855)
1 parent 7b5fc1a commit 60b3563

File tree

2 files changed

+31
-65
lines changed

2 files changed

+31
-65
lines changed

datahub/company/tasks/contact.py

+16-20
Original file line numberDiff line numberDiff line change
@@ -173,15 +173,18 @@ def _list_objects(self, client, bucket_name, prefix):
173173
Bucket=bucket_name,
174174
Prefix=prefix,
175175
)
176-
# Get the list of files, oldest first. Process in that order, so any changes in newer
177-
# files take precedence
176+
# Get the list of files, ordered by LastModified descending.
178177
sorted_files = sorted(
179178
[object for object in response.get('Contents', {})],
180179
key=lambda x: x['LastModified'],
181-
reverse=False,
180+
reverse=True,
182181
)
183182
return [file['Key'] for file in sorted_files]
184183

184+
def _get_most_recent_object(self, client, bucket_name, prefix):
185+
files_in_bucket = self._list_objects(client, bucket_name, prefix)
186+
return files_in_bucket[0] if len(files_in_bucket) > 0 else None
187+
185188
def _log_at_interval(self, index: int, message: str):
186189
"""
187190
Log in a way that is suitable for both small and large datasets. Initially
@@ -194,25 +197,23 @@ def _log_at_interval(self, index: int, message: str):
194197
def ingest(self):
195198
logger.info('Checking for new contact consent data files')
196199
s3_client = get_s3_client(REGION)
197-
file_keys = self._list_objects(s3_client, BUCKET, CONSENT_PREFIX)
198-
if len(file_keys) == 0:
200+
file_key = self._get_most_recent_object(s3_client, BUCKET, CONSENT_PREFIX)
201+
if not file_key:
199202
logger.info(
200203
'No contact consent files found in bucket %s matching prefix %s',
201204
BUCKET,
202205
CONSENT_PREFIX,
203206
)
204207
return
205208

206-
for file_key in file_keys:
207-
try:
208-
self.sync_file_with_database(s3_client, file_key)
209-
self.delete_file(s3_client, file_key)
210-
except Exception as exc:
211-
logger.exception(
212-
f'Error ingesting contact consent file {file_key}',
213-
stack_info=True,
214-
)
215-
raise exc
209+
try:
210+
self.sync_file_with_database(s3_client, file_key)
211+
except Exception as exc:
212+
logger.exception(
213+
f'Error ingesting contact consent file {file_key}',
214+
stack_info=True,
215+
)
216+
raise exc
216217

217218
def get_grouped_contacts(self) -> dict[str, List[Contact]]:
218219
contacts_qs = Contact.objects.all()
@@ -319,8 +320,3 @@ def sync_file_with_database(self, client, file_key):
319320
i,
320321
path,
321322
)
322-
323-
def delete_file(self, client, file_key):
324-
logger.info('Deleting contact consent file %s', file_key)
325-
client.delete_object(Bucket=BUCKET, Key=file_key)
326-
logger.info('Successfully deleted contact consent file %s', file_key)

datahub/company/test/tasks/test_contact_task.py

+15-45
Original file line numberDiff line numberDiff line change
@@ -428,8 +428,17 @@ def test_files():
428428
@mock_aws
429429
def setup_s3_bucket(bucket_name, test_files):
430430
mock_s3_client = _create_bucket(bucket_name)
431+
432+
last_modfied = datetime.datetime.now()
431433
for file in test_files:
432-
mock_s3_client.put_object(Bucket=bucket_name, Key=file, Body=json.dumps('Test contents'))
434+
# use freeze_time to allow uploaded files to have a different LastModified date
435+
with freeze_time(last_modfied):
436+
mock_s3_client.put_object(
437+
Bucket=bucket_name,
438+
Key=file,
439+
Body=json.dumps('Test contents'),
440+
)
441+
last_modfied = last_modfied + datetime.timedelta(seconds=3)
433442

434443

435444
def _create_bucket(bucket_name):
@@ -503,7 +512,7 @@ def test_ingest_with_exception_logs_error_and_reraises_original_exception(self,
503512
task.ingest()
504513

505514
@mock_aws
506-
def test_ingest_with_empty_s3_bucket_does_not_call_sync_or_delete(self):
515+
def test_ingest_with_empty_s3_bucket_does_not_call_sync(self):
507516
"""
508517
Test that the task can handle an empty S3 bucket
509518
"""
@@ -512,15 +521,13 @@ def test_ingest_with_empty_s3_bucket_does_not_call_sync_or_delete(self):
512521
with mock.patch.multiple(
513522
task,
514523
sync_file_with_database=mock.DEFAULT,
515-
delete_file=mock.DEFAULT,
516524
):
517525
task.ingest()
518526
task.sync_file_with_database.assert_not_called()
519-
task.delete_file.assert_not_called()
520527

521528
@mock_aws
522529
@override_settings(S3_LOCAL_ENDPOINT_URL=None)
523-
def test_ingest_calls_sync_with_correct_files_order(self, test_files):
530+
def test_ingest_calls_sync_with_newest_file_order(self, test_files):
524531
"""
525532
Test that the ingest calls the sync with the files in correct order
526533
"""
@@ -529,32 +536,11 @@ def test_ingest_calls_sync_with_correct_files_order(self, test_files):
529536
with mock.patch.multiple(
530537
task,
531538
sync_file_with_database=mock.DEFAULT,
532-
delete_file=mock.DEFAULT,
533539
):
534540
task.ingest()
535-
task.sync_file_with_database.assert_has_calls(
536-
[mock.call(mock.ANY, file) for file in test_files],
537-
)
538-
539-
@mock_aws
540-
@override_settings(S3_LOCAL_ENDPOINT_URL=None)
541-
def test_ingest_calls_delete_for_all_files(
542-
self,
543-
test_files,
544-
):
545-
"""
546-
Test that the ingest calls delete with the files in correct order
547-
"""
548-
setup_s3_bucket(BUCKET, test_files)
549-
task = ContactConsentIngestionTask()
550-
with mock.patch.multiple(
551-
task,
552-
sync_file_with_database=mock.DEFAULT,
553-
delete_file=mock.DEFAULT,
554-
):
555-
task.ingest()
556-
task.delete_file.assert_has_calls(
557-
[mock.call(mock.ANY, file) for file in test_files],
541+
task.sync_file_with_database.assert_called_once_with(
542+
mock.ANY,
543+
test_files[-1],
558544
)
559545

560546
@mock_aws
@@ -884,19 +870,3 @@ def test_should_update_contact_with_row_date_newer_than_contact_date_should_retu
884870
contact,
885871
row,
886872
)
887-
888-
@mock_aws
889-
def test_delete_file_removes_file_using_boto3(self):
890-
"""
891-
Test that the file is deleted from the bucket
892-
"""
893-
filename = f'{CONSENT_PREFIX}file_{uuid.uuid4()}.jsonl'
894-
upload_file_to_s3(BUCKET, filename, 'test')
895-
client = boto3.client('s3', REGION)
896-
897-
ContactConsentIngestionTask().delete_file(client, filename)
898-
with pytest.raises(client.exceptions.NoSuchKey):
899-
client.get_object(
900-
Bucket=BUCKET,
901-
Key=filename,
902-
)

0 commit comments

Comments
 (0)