Skip to content

Commit 29c4ceb

Browse files
authored
Merge pull request #5811 from uktrade/fix-ingestion-great-company-registration-number
Fix ingestion of great-company-registration-number
2 parents 12ff05e + 2bb6899 commit 29c4ceb

File tree

2 files changed

+28
-7
lines changed

2 files changed

+28
-7
lines changed

datahub/company_activity/tasks/ingest_great_data.py

+11-2
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,13 @@ def ingest_great_data(bucket, file):
1919
logger.info(f'Ingesting file: {file} finished')
2020

2121

22+
def validate_company_registration_number(company_registration_number):
23+
company_registration_number_str = str(company_registration_number)
24+
if len(company_registration_number_str) > 10:
25+
return None
26+
return company_registration_number_str
27+
28+
2229
class GreatIngestionTask:
2330
def __init__(self):
2431
self._countries = None
@@ -45,7 +52,9 @@ def _already_ingested(self, id):
4552
def _create_company(self, data, form_id):
4653
company = Company.objects.create(
4754
name=data.get('business_name', ''),
48-
company_number=data.get('company_registration_number', ''),
55+
company_number=validate_company_registration_number(
56+
data.get('company_registration_number', ''),
57+
),
4958
turnover_range=self._get_turnover_range(data.get('annual_turnover')),
5059
business_type=self._get_business_type(data.get('type')),
5160
employee_range=self._get_business_size(data.get('number_of_employees')),
@@ -72,7 +81,7 @@ def _create_contact(self, data, company, form_id):
7281

7382
def _get_company(self, data, form_id):
7483
company = self._get_company_by_companies_house_num(
75-
data.get('company_registration_number'),
84+
validate_company_registration_number(data.get('company_registration_number')),
7685
)
7786
if company:
7887
return company

datahub/company_activity/tests/test_tasks/test_great_ingestion_task.py

+17-5
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@
1515
from datahub.company_activity.models import GreatExportEnquiry, IngestedFile
1616
from datahub.company_activity.tasks.constants import BUCKET, GREAT_PREFIX, REGION
1717
from datahub.company_activity.tasks.ingest_great_data import (
18-
GreatIngestionTask, ingest_great_data,
18+
GreatIngestionTask,
19+
ingest_great_data,
1920
)
2021
from datahub.company_activity.tests.factories import (
2122
GreatExportEnquiryFactory,
@@ -25,7 +26,9 @@
2526

2627
@pytest.fixture
2728
def test_file():
28-
filepath = 'datahub/company_activity/tests/test_tasks/fixtures/great/20241023T000346.jsonl.gz'
29+
filepath = (
30+
'datahub/company_activity/tests/test_tasks/fixtures/great/20241023T000346.jsonl.gz'
31+
)
2932
return open(filepath, 'rb')
3033

3134

@@ -164,7 +167,7 @@ def test_company_name_mapping(self):
164167
"id": "5250",
165168
"created_at": "2024-09-19T14:00:34.069",
166169
"data": {{
167-
"company_registration_number": 994349,
170+
"company_registration_number": "994349",
168171
"business_name": "{company.name}"
169172
}}
170173
}}
@@ -563,7 +566,7 @@ def test_boolean_field_mapping(self):
563566

564567
@pytest.mark.django_db
565568
@mock_aws
566-
def test_long_field_values(self, test_file_path):
569+
def test_long_field_values(self):
567570
"""
568571
Test that we can ingest records with long field values
569572
"""
@@ -575,6 +578,7 @@ def test_long_field_values(self, test_file_path):
575578
'that either need to be stored as TextFields if we need'
576579
'the full value or truncated if we do not. Long long long.'
577580
)
581+
578582
data = f"""
579583
{{
580584
"id": "5249",
@@ -589,10 +593,18 @@ def test_long_field_values(self, test_file_path):
589593
"product_or_service_2": "{long_text}",
590594
"product_or_service_3": "{long_text}",
591595
"product_or_service_4": "{long_text}",
592-
"product_or_service_5": "{long_text}"
596+
"product_or_service_5": "{long_text}",
597+
"company_registration_number": "{long_text}"
598+
593599
}}
594600
}}
595601
"""
596602
task = GreatIngestionTask()
597603
task.json_to_model(json.loads(data))
598604
assert GreatExportEnquiry.objects.count() == initial_count + 1
605+
606+
result = GreatExportEnquiry.objects.get(form_id='5249').company_id
607+
608+
company_result = Company.objects.get(id=result)
609+
610+
assert company_result.company_number is None

0 commit comments

Comments
 (0)