Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feature/improve-consent-job-logging #5782

Merged
merged 1 commit into from
Nov 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 36 additions & 10 deletions datahub/company/tasks/contact.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,18 +192,32 @@ def _log_at_interval(self, index: int, message: str):
logger.info(message)

def ingest(self):
logger.info('Checking for new Contact Consent data files')
logger.info('Checking for new contact consent data files')
s3_client = get_s3_client(REGION)
file_keys = self._list_objects(s3_client, BUCKET, CONSENT_PREFIX)
if len(file_keys) == 0:
logger.info('No files found in bucket %s matching prefix %s', BUCKET, CONSENT_PREFIX)
logger.info(
'No contact consent files found in bucket %s matching prefix %s',
BUCKET,
CONSENT_PREFIX,
)
return
for file_key in file_keys:
self.sync_file_with_database(s3_client, file_key)
self.delete_file(s3_client, file_key)
try:
self.sync_file_with_database(s3_client, file_key)
self.delete_file(s3_client, file_key)
except Exception as exc:
logger.exception(
f'Error ingesting contact consent file {file_key}',
stack_info=True,
)
raise exc

def sync_file_with_database(self, client, file_key):
logger.info('Syncing file %s', file_key)
logger.info(
'Syncing contact consent file %s with datahub contacts',
file_key,
)
path = f's3://{BUCKET}/{file_key}'

with open(
Expand All @@ -221,13 +235,20 @@ def sync_file_with_database(self, client, file_key):

consent_row = json.loads(line)
if 'email' not in consent_row or 'consents' not in consent_row:
logger.info(
'Row %s does not contain required consent data to process, skipping',
i,
)
continue

email = consent_row['email']
matching_contacts = Contact.objects.filter(email=email)

if not matching_contacts.exists():
logger.debug('Email %s has no matching datahub contact', email)
logger.info(
'Email %s in contact consent file has no matching datahub contact',
email,
)
continue

for matching_contact in matching_contacts:
Expand All @@ -242,7 +263,7 @@ def sync_file_with_database(self, client, file_key):
or last_modified is None
):
update_row = True
# to avoid issues with different source system time formats, just compare on
# To avoid issues with different source system time formats, just compare on
# the date portion
elif (
parser.parse(last_modified).date()
Expand All @@ -263,15 +284,20 @@ def sync_file_with_database(self, client, file_key):
)
matching_contact.save()

logger.debug('Updated consents for email %s', email)
logger.info('Updated contact consent data for email %s', email)
else:
logger.info(
'Email %s would have consent data updated, but setting is disabled',
email,
)

logger.info('Finished processing total %s rows from %s', i, path)
logger.info(
'Finished processing total %s rows for contact consent from file %s',
i,
path,
)

def delete_file(self, client, file_key):
logger.info('Deleting file %s', file_key)
logger.info('Deleting contact consent file %s', file_key)
client.delete_object(Bucket=BUCKET, Key=file_key)
logger.info('Successfully deleted contact consent file %s', file_key)
16 changes: 16 additions & 0 deletions datahub/company/test/tasks/test_contact_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -484,6 +484,22 @@ def test_lock(
@pytest.mark.django_db
class TestContactConsentIngestionTask:

@mock_aws
@override_settings(S3_LOCAL_ENDPOINT_URL=None)
def test_ingest_with_exception_logs_error_and_reraises_original_exception(self, test_files):
"""
Test that the task can catch and log any unhandled exceptions
"""
setup_s3_bucket(BUCKET, test_files)

with mock.patch.object(
ContactConsentIngestionTask,
'sync_file_with_database',
side_effect=AttributeError('Original error message'),
), pytest.raises(AttributeError, match='Original error message'):
task = ContactConsentIngestionTask()
task.ingest()

@mock_aws
def test_ingest_with_empty_s3_bucket_does_not_call_sync_or_delete(self):
"""
Expand Down