Skip to content

Commit 4c93e3c

Browse files
Merge pull request #5957 from uktrade/bugfix/TET-981-stova-event-ingestion
Bugfix/tet 981 stova event ingestion
2 parents 170b1a3 + f3210c2 commit 4c93e3c

File tree

4 files changed

+200
-47
lines changed

4 files changed

+200
-47
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
# Generated by Django 4.2.17 on 2025-02-18 07:55
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
('company_activity', '0027_stova_event_allow_empty_fields'),
10+
]
11+
12+
operations = [
13+
migrations.AlterField(
14+
model_name='stovaevent',
15+
name='description',
16+
field=models.TextField(blank=True),
17+
),
18+
migrations.AlterField(
19+
model_name='stovaevent',
20+
name='location_address2',
21+
field=models.CharField(blank=True, max_length=255),
22+
),
23+
migrations.AlterField(
24+
model_name='stovaevent',
25+
name='location_address3',
26+
field=models.CharField(blank=True, max_length=255),
27+
),
28+
migrations.AlterField(
29+
model_name='stovaevent',
30+
name='location_name',
31+
field=models.CharField(blank=True, max_length=255, null=True),
32+
),
33+
migrations.AlterField(
34+
model_name='stovaevent',
35+
name='location_postcode',
36+
field=models.CharField(blank=True, max_length=255),
37+
),
38+
migrations.AlterField(
39+
model_name='stovaevent',
40+
name='location_state',
41+
field=models.CharField(blank=True, max_length=255),
42+
),
43+
migrations.AlterField(
44+
model_name='stovaevent',
45+
name='url',
46+
field=models.TextField(blank=True, null=True),
47+
),
48+
]

datahub/company_activity/models/stova_event.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ class StovaEvent(models.Model):
2020

2121
stova_event_id = models.IntegerField(unique=True)
2222
name = models.TextField()
23-
description = models.TextField()
23+
description = models.TextField(blank=True)
2424
code = models.CharField(max_length=MAX_LENGTH, blank=True, default='')
2525

2626
created_by = models.CharField(max_length=MAX_LENGTH, blank=True, default='')
@@ -33,7 +33,7 @@ class StovaEvent(models.Model):
3333
city = models.CharField(max_length=MAX_LENGTH)
3434
state = models.CharField(max_length=MAX_LENGTH)
3535
timezone = models.CharField(max_length=MAX_LENGTH, blank=True, null=True, default='')
36-
url = models.TextField()
36+
url = models.TextField(blank=True, null=True)
3737
max_reg = models.IntegerField(null=True, blank=True)
3838

3939
created_date = models.DateTimeField()
@@ -43,14 +43,14 @@ class StovaEvent(models.Model):
4343
close_date = models.DateTimeField(null=True, blank=True)
4444
end_date = models.DateTimeField(null=True, blank=True)
4545

46-
location_state = models.CharField(max_length=MAX_LENGTH)
46+
location_state = models.CharField(max_length=MAX_LENGTH, blank=True)
4747
location_country = models.CharField(max_length=MAX_LENGTH)
4848
location_address1 = models.CharField(max_length=MAX_LENGTH)
49-
location_address2 = models.CharField(max_length=MAX_LENGTH)
50-
location_address3 = models.CharField(max_length=MAX_LENGTH)
49+
location_address2 = models.CharField(max_length=MAX_LENGTH, blank=True)
50+
location_address3 = models.CharField(max_length=MAX_LENGTH, blank=True)
5151
location_city = models.CharField(max_length=MAX_LENGTH)
52-
location_name = models.CharField(max_length=MAX_LENGTH)
53-
location_postcode = models.CharField(max_length=MAX_LENGTH)
52+
location_name = models.CharField(max_length=MAX_LENGTH, null=True, blank=True)
53+
location_postcode = models.CharField(max_length=MAX_LENGTH, blank=True)
5454

5555
approval_required = models.BooleanField()
5656
price_type = models.CharField(max_length=MAX_LENGTH)

datahub/company_activity/tasks/ingest_stova_events.py

+83-33
Original file line numberDiff line numberDiff line change
@@ -51,43 +51,93 @@ def _should_process_record(self, record: dict) -> bool:
5151

5252
return True
5353

54+
@staticmethod
55+
def _required_fields() -> list:
56+
"""
57+
Returns a list of fields required for to make a StovaEvent a Data Hub Event.
58+
Any fields listed here but not provided by Stova will be rejected from ingestion.
59+
60+
:return: Required fields to save a StovaEvent.
61+
"""
62+
return [
63+
'id',
64+
'name',
65+
'location_address1',
66+
'location_city',
67+
]
68+
69+
@staticmethod
70+
def _convert_fields_from_null_to_blank(values: dict) -> dict:
71+
"""
72+
Coverts values from the stova record which could be null into empty strings for saving
73+
as a Data Hub event.
74+
75+
:param values: A single Stova Event record from an S3 bucket.
76+
:return: A single Stova Event record with null/None values replaced with empty strings.
77+
"""
78+
fields_required_as_blank = [
79+
'location_address2',
80+
'location_address3',
81+
'location_state',
82+
'location_postcode',
83+
'description',
84+
]
85+
86+
for field in fields_required_as_blank:
87+
if values[field] is None:
88+
values[field] = ''
89+
90+
return values
91+
5492
def _process_record(self, record: dict) -> None:
5593
"""Saves an event from Stova from the S3 bucket into a `StovaEvent`"""
5694
stova_event_id = record.get('id')
95+
96+
required_fields = self._required_fields()
97+
for field in required_fields:
98+
if record[field] is None or record[field] == '':
99+
logger.info(
100+
f'Stova Event with id {stova_event_id} does not have required field {field}. '
101+
'This stova event will not be processed into Data Hub.',
102+
)
103+
return
104+
105+
cleaned_record = self._convert_fields_from_null_to_blank(record)
106+
57107
values = {
58-
'stova_event_id': record.get('id'),
59-
'url': record.get('url', ''),
60-
'city': record.get('city', ''),
61-
'code': record.get('code', ''),
62-
'name': record.get('name', ''),
63-
'state': record.get('state', ''),
64-
'country': record.get('country', ''),
65-
'max_reg': record.get('max_reg'),
66-
'end_date': record.get('end_date'),
67-
'timezone': record.get('timezone', ''),
68-
'folder_id': record.get('folder_id'),
69-
'live_date': record.get('live_date'),
70-
'close_date': record.get('close_date'),
71-
'created_by': record.get('created_by', ''),
72-
'price_type': record.get('price_type', ''),
73-
'start_date': record.get('start_date'),
74-
'description': record.get('description', ''),
75-
'modified_by': record.get('modified_by', ''),
76-
'contact_info': record.get('contact_info', ''),
77-
'created_date': record.get('created_date'),
78-
'location_city': record.get('location_city', ''),
79-
'location_name': record.get('location_name', ''),
80-
'modified_date': record.get('modified_date'),
81-
'client_contact': record.get('client_contact', ''),
82-
'location_state': record.get('location_state', ''),
83-
'default_language': record.get('default_language', ''),
84-
'location_country': record.get('location_country', ''),
85-
'approval_required': record.get('approval_required'),
86-
'location_address1': record.get('location_address1', ''),
87-
'location_address2': record.get('location_address2', ''),
88-
'location_address3': record.get('location_address3', ''),
89-
'location_postcode': record.get('location_postcode', ''),
90-
'standard_currency': record.get('standard_currency', ''),
108+
'stova_event_id': cleaned_record.get('id'),
109+
'url': cleaned_record.get('url', ''),
110+
'city': cleaned_record.get('city', ''),
111+
'code': cleaned_record.get('code', ''),
112+
'name': cleaned_record.get('name', ''),
113+
'state': cleaned_record.get('state', ''),
114+
'country': cleaned_record.get('country', ''),
115+
'max_reg': cleaned_record.get('max_reg'),
116+
'end_date': cleaned_record.get('end_date'),
117+
'timezone': cleaned_record.get('timezone', ''),
118+
'folder_id': cleaned_record.get('folder_id'),
119+
'live_date': cleaned_record.get('live_date'),
120+
'close_date': cleaned_record.get('close_date'),
121+
'created_by': cleaned_record.get('created_by', ''),
122+
'price_type': cleaned_record.get('price_type', ''),
123+
'start_date': cleaned_record.get('start_date'),
124+
'description': cleaned_record.get('description', ''),
125+
'modified_by': cleaned_record.get('modified_by', ''),
126+
'contact_info': cleaned_record.get('contact_info', ''),
127+
'created_date': cleaned_record.get('created_date'),
128+
'location_city': cleaned_record.get('location_city', ''),
129+
'location_name': cleaned_record.get('location_name', ''),
130+
'modified_date': cleaned_record.get('modified_date'),
131+
'client_contact': cleaned_record.get('client_contact', ''),
132+
'location_state': cleaned_record.get('location_state', ''),
133+
'default_language': cleaned_record.get('default_language', ''),
134+
'location_country': cleaned_record.get('location_country', ''),
135+
'approval_required': cleaned_record.get('approval_required'),
136+
'location_address1': cleaned_record.get('location_address1', ''),
137+
'location_address2': cleaned_record.get('location_address2', ''),
138+
'location_address3': cleaned_record.get('location_address3', ''),
139+
'location_postcode': cleaned_record.get('location_postcode', ''),
140+
'standard_currency': cleaned_record.get('standard_currency', ''),
91141
}
92142

93143
try:

datahub/company_activity/tests/test_tasks/test_stova_ingestion_task.py

+62-7
Original file line numberDiff line numberDiff line change
@@ -109,9 +109,9 @@ def capture_envelope(self, envelope):
109109
self.events.append(envelope)
110110

111111

112+
@pytest.mark.django_db
112113
class TestStovaIngestionTasks:
113114

114-
@pytest.mark.django_db
115115
@mock_aws
116116
@override_settings(S3_LOCAL_ENDPOINT_URL=None)
117117
def test_stova_data_file_ingestion(self, caplog, test_file, test_file_path):
@@ -132,7 +132,6 @@ def test_stova_data_file_ingestion(self, caplog, test_file, test_file_path):
132132
assert StovaEvent.objects.count() == initial_stova_activity_count + 27
133133
assert IngestedObject.objects.count() == initial_ingested_count + 1
134134

135-
@pytest.mark.django_db
136135
@mock_aws
137136
@override_settings(S3_LOCAL_ENDPOINT_URL=None)
138137
def test_skip_previously_ingested_records(self, test_file_path, test_base_stova_event):
@@ -152,7 +151,6 @@ def test_skip_previously_ingested_records(self, test_file_path, test_base_stova_
152151
stova_event_ingestion_task(test_file_path)
153152
assert StovaEvent.objects.filter(stova_event_id=123456789).count() == 1
154153

155-
@pytest.mark.django_db
156154
@mock_aws
157155
@override_settings(S3_LOCAL_ENDPOINT_URL=None)
158156
def test_invalid_file(self, test_file_path):
@@ -169,7 +167,6 @@ def test_invalid_file(self, test_file_path):
169167
expected = "key: 'data-flow/exports/ExportAventriEvents/" 'stovaEventFake2.jsonl.gz'
170168
assert expected in exception
171169

172-
@pytest.mark.django_db
173170
def test_stova_event_fields_are_saved(self, test_base_stova_event):
174171
"""
175172
Test that the ingested stova event fields are saved to the StovaEvent model.
@@ -190,7 +187,6 @@ def test_stova_event_fields_are_saved(self, test_base_stova_event):
190187

191188
assert model_value == file_value
192189

193-
@pytest.mark.django_db
194190
def test_stova_event_fields_with_duplicate_attendee_ids_in_db(
195191
self, caplog, test_base_stova_event,
196192
):
@@ -208,7 +204,6 @@ def test_stova_event_fields_with_duplicate_attendee_ids_in_db(
208204
f'Record already exists for stova_event_id: {existing_stova_event.stova_event_id}'
209205
) in caplog.text
210206

211-
@pytest.mark.django_db
212207
def test_stova_event_fields_with_duplicate_attendee_ids_in_json(
213208
self, caplog, test_base_stova_event,
214209
):
@@ -229,7 +224,6 @@ def test_stova_event_fields_with_duplicate_attendee_ids_in_json(
229224
"Stova event id already exists.']" in caplog.text
230225
)
231226

232-
@pytest.mark.django_db
233227
def test_stova_event_ingestion_handles_unexpected_fields(self, caplog, test_base_stova_event):
234228
"""
235229
Test that if they rows from stova contain data in an unexpected data type these are handled
@@ -253,3 +247,64 @@ def test_stova_event_ingestion_handles_unexpected_fields(self, caplog, test_base
253247
) in caplog.text
254248

255249
assert 'approval_required' in caplog.text
250+
251+
@pytest.mark.parametrize(
252+
'required_field',
253+
(
254+
'id',
255+
'name',
256+
'location_address1',
257+
'location_city',
258+
),
259+
)
260+
def test_stova_event_ingestion_rejects_event_if_missing_required_fields(
261+
self, caplog, test_base_stova_event, required_field,
262+
):
263+
"""
264+
Some fields are required by Data Hub events, if a Stova Event does not provide these fields
265+
the stova event will not be ingested.
266+
"""
267+
s3_processor_mock = mock.Mock()
268+
task = StovaEventIngestionTask('dummy-prefix', s3_processor_mock)
269+
270+
data = test_base_stova_event
271+
272+
# This is required so a Stova Event can be saved as a Data Hub event.
273+
data[required_field] = None
274+
275+
with caplog.at_level(logging.INFO):
276+
task._process_record(data)
277+
assert (
278+
f'Stova Event with id {data.get("id")} does not have required field '
279+
f'{required_field}. This stova event will not be processed into Data Hub.'
280+
) in caplog.text
281+
282+
assert StovaEvent.objects.count() == 0
283+
284+
@pytest.mark.parametrize(
285+
'null_field',
286+
(
287+
'location_address2',
288+
'location_address3',
289+
'location_state',
290+
'location_postcode',
291+
'description',
292+
),
293+
)
294+
def test_stova_event_ingestion_converts_null_fields_to_empty_string(
295+
self, test_base_stova_event, null_field,
296+
):
297+
"""
298+
Some fields are required to be an empty string by Data Hub Events, they do not accept
299+
null values.
300+
"""
301+
s3_processor_mock = mock.Mock()
302+
task = StovaEventIngestionTask('dummy-prefix', s3_processor_mock)
303+
304+
data = test_base_stova_event
305+
# This must be empty string to be saved, test it gets converted from None
306+
data[null_field] = None
307+
task._process_record(data)
308+
309+
assert StovaEvent.objects.count() == 1
310+
assert getattr(StovaEvent.objects.first(), null_field) == ''

0 commit comments

Comments
 (0)