Skip to content

Commit 6484739

Browse files
authored
Merge pull request #270 from uktrade/release/huangjiu
Release Huangjiu
2 parents a81f5b9 + e89452b commit 6484739

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

67 files changed

+2385
-318
lines changed

README.rst

+17-1
Original file line numberDiff line numberDiff line change
@@ -161,10 +161,13 @@ Leeloo can run on any Heroku style platform. These environment variables MUST be
161161
- DJANGO_SECRET_KEY
162162
- DJANGO_SENTRY_DSN
163163
- DJANGO_SETTINGS_MODULE
164-
- BULK_CREATE_BATCH_SIZE (default=50000)
164+
- BULK_CREATE_BATCH_SIZE (default=5000)
165165
- ES_HOST
166166
- ES_PORT
167167
- ES_INDEX
168+
- AWS_ACCESS_KEY_ID
169+
- AWS_SECRET_ACCESS_KEY
170+
- DOCUMENTS_BUCKET
168171

169172

170173
Management commands
@@ -195,3 +198,16 @@ Load metadata::
195198

196199
docker-compose run leeloo python manage.py loaddata /app/fixtures/metadata.yaml
197200
docker-compose run leeloo python manage.py loaddata /app/fixtures/datahub_businesstypes.yaml
201+
202+
Dependencies
203+
------------
204+
Direct dependencies are specified in ``requirements.in``. ``requirements.txt`` is a lock file generated using `pip-compile
205+
(from pip-tools) <https://github.com/jazzband/pip-tools>`_ and should not be manually edited.
206+
207+
To update the lock file and indirect dependencies, run::
208+
209+
pip-compile --upgrade --output-file requirements.txt requirements.in
210+
211+
This must be run whenever ``requirements.in`` is edited.
212+
213+
Dependencies should still be installed using ``requirements.txt``.

config/api_urls.py

+2
Original file line numberDiff line numberDiff line change
@@ -34,5 +34,7 @@
3434
url(r'^', include((investment_urls, 'investment'), namespace='investment')),
3535
url(r'^', include((leads_urls, 'business-leads'), namespace='business-leads')),
3636
url(r'^', include((company_urls.contact_urls, 'contact'), namespace='contact')),
37+
url(r'^', include((company_urls.company_urls, 'company'), namespace='company')),
38+
url(r'^', include((company_urls.ch_company_urls, 'ch-company'), namespace='ch-company')),
3739
url(r'^', include((search_urls, 'search'), namespace='search'))
3840
]

config/settings/common.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
LOCAL_APPS = [
5252
'datahub.core',
5353
'datahub.company',
54+
'datahub.documents',
5455
'datahub.interaction',
5556
'datahub.investment',
5657
'datahub.leads',
@@ -149,7 +150,7 @@
149150
'DEFAULT_VERSIONING_CLASS': 'rest_framework.versioning.NamespaceVersioning',
150151
'DEFAULT_PAGINATION_CLASS': 'rest_framework.pagination.LimitOffsetPagination',
151152
'PAGE_SIZE': 100,
152-
'DEFAULT_AUTHENTICATION_CLASSES': ['oauth2_provider.ext.rest_framework.OAuth2Authentication'],
153+
'DEFAULT_AUTHENTICATION_CLASSES': ['oauth2_provider.contrib.rest_framework.OAuth2Authentication'],
153154
'DEFAULT_PERMISSION_CLASSES': ['rest_framework.permissions.IsAuthenticated'],
154155
}
155156

@@ -168,4 +169,5 @@
168169
CDMS_TEXT_MAX_LENGTH = 4000
169170
CHAR_FIELD_MAX_LENGTH = 255
170171
HEROKU = False
171-
BULK_CREATE_BATCH_SIZE = env.int('BULK_CREATE_BATCH_SIZE', default=50000)
172+
BULK_CREATE_BATCH_SIZE = env.int('BULK_CREATE_BATCH_SIZE', default=5000)
173+
DOCUMENTS_BUCKET = env('DOCUMENTS_BUCKET')

config/settings/common_sentry.py

+54
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
from config.settings.common import *
2+
3+
MIDDLEWARE.append('raven.contrib.django.raven_compat.middleware.SentryResponseErrorIdMiddleware')
4+
INSTALLED_APPS.append('raven.contrib.django.raven_compat')
5+
6+
SENTRY_DSN = env('DJANGO_SENTRY_DSN')
7+
8+
# Logging
9+
LOGGING = {
10+
'version': 1,
11+
'disable_existing_loggers': False,
12+
'root': {
13+
'level': 'INFO',
14+
'handlers': ['sentry', 'console'],
15+
},
16+
'formatters': {
17+
'verbose': {
18+
'format': '%(asctime)s [%(levelname)s] [%(name)s] %(message)s'
19+
},
20+
},
21+
'handlers': {
22+
'sentry': {
23+
'level': 'ERROR',
24+
'class': 'raven.contrib.django.raven_compat.handlers.SentryHandler'
25+
},
26+
'console': {
27+
'level': 'DEBUG',
28+
'class': 'logging.StreamHandler',
29+
'formatter': 'verbose'
30+
},
31+
},
32+
'loggers': {
33+
'django.db.backends': {
34+
'level': 'ERROR',
35+
'handlers': ['console'],
36+
'propagate': False,
37+
},
38+
'raven': {
39+
'level': 'DEBUG',
40+
'handlers': ['console'],
41+
'propagate': False,
42+
},
43+
'sentry.errors': {
44+
'level': 'DEBUG',
45+
'handlers': ['console'],
46+
'propagate': False,
47+
},
48+
},
49+
}
50+
51+
52+
RAVEN_CONFIG = {
53+
'DSN': SENTRY_DSN,
54+
}

config/settings/dev.py

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
from .common_sentry import *
2+
3+
# DRF
4+
REST_FRAMEWORK['DEFAULT_AUTHENTICATION_CLASSES'] += ['rest_framework.authentication.SessionAuthentication']

config/settings/local.py

+39
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,42 @@
2424
],
2525
'SHOW_TEMPLATE_CONTEXT': True,
2626
}
27+
28+
# This gets normal Python logging working with Django
29+
LOGGING = {
30+
'version': 1,
31+
'disable_existing_loggers': True,
32+
'root': {
33+
'level': 'INFO',
34+
'handlers': ['console'],
35+
},
36+
'formatters': {
37+
'verbose': {
38+
'format': '[%(levelname)s] [%(name)s] %(message)s'
39+
},
40+
},
41+
'handlers': {
42+
'console': {
43+
'level': 'DEBUG',
44+
'class': 'logging.StreamHandler',
45+
'formatter': 'verbose'
46+
},
47+
},
48+
'loggers': {
49+
'werkzeug': {
50+
'handlers': ['console'],
51+
'level': 'INFO',
52+
'propagate': False
53+
},
54+
'django': {
55+
'handlers': ['console'],
56+
'level': 'INFO',
57+
'propagate': False
58+
},
59+
'django.server': {
60+
'handlers': ['console'],
61+
'level': 'INFO',
62+
'propagate': False,
63+
},
64+
},
65+
}

config/settings/production.py

+1-58
Original file line numberDiff line numberDiff line change
@@ -1,60 +1,3 @@
1-
from .common import *
1+
from .common_sentry import *
22

3-
MIDDLEWARE += ('raven.contrib.django.raven_compat.middleware.SentryResponseErrorIdMiddleware',)
4-
INSTALLED_APPS += ('raven.contrib.django.raven_compat',)
5-
6-
# Logging
7-
SENTRY_DSN = env('DJANGO_SENTRY_DSN')
8-
LOGGING = {
9-
'version': 1,
10-
'disable_existing_loggers': True,
11-
'root': {
12-
'level': 'WARNING',
13-
'handlers': ['sentry'],
14-
},
15-
'formatters': {
16-
'verbose': {
17-
'format': '%(levelname)s %(asctime)s %(module)s '
18-
'%(process)d %(thread)d %(message)s'
19-
},
20-
},
21-
'handlers': {
22-
'sentry': {
23-
'level': 'ERROR',
24-
'class': 'raven.contrib.django.raven_compat.handlers.SentryHandler',
25-
},
26-
'console': {
27-
'level': 'DEBUG',
28-
'class': 'logging.StreamHandler',
29-
'formatter': 'verbose'
30-
}
31-
},
32-
'loggers': {
33-
'django.db.backends': {
34-
'level': 'ERROR',
35-
'handlers': ['console'],
36-
'propagate': False,
37-
},
38-
'raven': {
39-
'level': 'DEBUG',
40-
'handlers': ['console'],
41-
'propagate': False,
42-
},
43-
'sentry.errors': {
44-
'level': 'DEBUG',
45-
'handlers': ['console'],
46-
'propagate': False,
47-
},
48-
'django.security.DisallowedHost': {
49-
'level': 'ERROR',
50-
'handlers': ['console', 'sentry'],
51-
'propagate': False,
52-
},
53-
},
54-
}
55-
56-
57-
RAVEN_CONFIG = {
58-
'DSN': SENTRY_DSN,
59-
}
603
STATICFILES_STORAGE = 'whitenoise.django.GzipManifestStaticFilesStorage'

config/settings/sample.env

+4
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,7 @@ ES_INDEX=test
77
DATAHUB_SECRET=secret
88
CDMS_AUTH_URL=http://example.com
99
DOCKER_DEV=True
10+
AWS_DEFAULT_REGION=eu-west-2
11+
AWS_ACCESS_KEY_ID=foo
12+
AWS_SECRET_ACCESS_KEY=bar
13+
DOCUMENTS_BUCKET=baz

config/settings/staging.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from .common import *
1+
from .common_sentry import *
22

33
# DRF
44
REST_FRAMEWORK['DEFAULT_AUTHENTICATION_CLASSES'] += ['rest_framework.authentication.SessionAuthentication']

datahub/company/admin.py

+8-6
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,15 @@ class CompanyAdmin(VersionAdmin):
1111
"""Company admin."""
1212

1313
search_fields = ['name', 'id', 'company_number']
14+
raw_id_fields = ('parent', 'one_list_account_owner', 'archived_by')
1415

1516

1617
@admin.register(Contact)
1718
class ContactAdmin(VersionAdmin):
1819
"""Contact admin."""
1920

2021
search_fields = ['first_name', 'last_name', 'company__name']
22+
raw_id_fields = ('company', 'adviser', 'archived_by')
2123

2224

2325
@admin.register(CompaniesHouseCompany)
@@ -57,7 +59,7 @@ class AdviserAdmin(VersionAdmin, UserAdmin):
5759
}),
5860
('Permissions', {
5961
'fields': (
60-
'enabled',
62+
'use_cdms_auth',
6163
'is_active',
6264
'is_staff',
6365
'is_superuser',
@@ -78,10 +80,10 @@ class AdviserAdmin(VersionAdmin, UserAdmin):
7880
'fields': ('email', 'password1', 'password2'),
7981
}),
8082
)
81-
list_display = ('email', 'first_name', 'last_name', 'is_staff', 'enabled')
83+
list_display = ('email', 'first_name', 'last_name', 'is_staff', 'use_cdms_auth')
8284
search_fields = ('first_name', 'last_name', 'email')
83-
ordering = ('email', 'enabled')
84-
list_filter = ('enabled',)
85+
ordering = ('email', 'use_cdms_auth')
86+
list_filter = ('use_cdms_auth',)
8587
actions = ['enable_users', 'disable_users']
8688

8789
def reversion_register(self, model, **kwargs):
@@ -91,12 +93,12 @@ def reversion_register(self, model, **kwargs):
9193

9294
def enable_users(self, request, queryset):
9395
"""Enable users for login."""
94-
queryset.update(enabled=True)
96+
queryset.update(use_cdms_auth=True)
9597

9698
enable_users.short_description = 'Enable users'
9799

98100
def disable_users(self, request, queryset):
99101
"""Disable users for login."""
100-
queryset.update(enabled=False)
102+
queryset.update(use_cdms_auth=False)
101103

102104
disable_users.short_description = 'Disable users.'

datahub/company/management/commands/manageusers.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -39,4 +39,4 @@ def handle(self, *args, **options):
3939
enabled = True
4040
else:
4141
enabled = False
42-
user_model.objects.filter(email__in=options['users']).update(enabled=enabled)
42+
user_model.objects.filter(email__in=options['users']).update(use_cdms_auth=enabled)

datahub/company/management/commands/sync_ch.py

+26-14
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,11 @@
1111
import requests
1212
from django.conf import settings
1313
from django.core.management.base import BaseCommand
14-
from django.db import connection, transaction
14+
from django.db import connection, reset_queries, transaction
1515
from lxml import etree
16-
from raven.contrib.django.raven_compat.models import client
1716

1817
from datahub.company.models import CompaniesHouseCompany
19-
from datahub.core.utils import log_and_ignore_exceptions, slice_iterable_into_chunks, stream_to_file_pointer
18+
from datahub.core.utils import slice_iterable_into_chunks, stream_to_file_pointer
2019

2120
logger = getLogger(__name__)
2221

@@ -73,6 +72,7 @@ def open_ch_zipped_csv(fp):
7372

7473
def iter_ch_csv_from_url(url, tmp_file_creator):
7574
"""Fetch & cache CH zipped CSV, and then iterate though contents."""
75+
logger.info('Loading CSV from URL: %s', url)
7676
with tmp_file_creator() as tf:
7777
stream_to_file_pointer(url, tf)
7878
tf.seek(0, 0)
@@ -90,18 +90,32 @@ def sync_ch(tmp_file_creator, endpoint=None, truncate_first=False):
9090
https://github.com/django/django/blob/master/django/db/models/query.py#L420
9191
this would create a list with millions of objects, that will try to be saved in batches in a single transaction
9292
"""
93+
logger.info('Starting CH load...')
94+
count = 0
9395
endpoint = endpoint or settings.CH_DOWNLOAD_URL
9496
ch_csv_urls = get_ch_latest_dump_file_list(endpoint)
97+
logger.info('Found the following Companies House CSV URLs: %s', ch_csv_urls)
9598
if truncate_first:
9699
truncate_ch_companies_table()
97100
for csv_url in ch_csv_urls:
98101
ch_company_rows = iter_ch_csv_from_url(csv_url, tmp_file_creator)
99-
for batchiter in slice_iterable_into_chunks(ch_company_rows, settings.BULK_CREATE_BATCH_SIZE):
100-
objects = [CompaniesHouseCompany(**ch_company_row) for ch_company_row in batchiter if ch_company_row]
102+
103+
batch_iter = slice_iterable_into_chunks(
104+
ch_company_rows, settings.BULK_CREATE_BATCH_SIZE, _create_ch_company
105+
)
106+
for batch in batch_iter:
101107
CompaniesHouseCompany.objects.bulk_create(
102-
objs=objects,
108+
objs=batch,
103109
batch_size=settings.BULK_CREATE_BATCH_SIZE
104110
)
111+
count += len(batch)
112+
logger.info('%d Companies House records loaded...', count)
113+
# In debug mode, Django keeps track of SQL statements executed which
114+
# eventually leads to memory exhaustion.
115+
# This clears that history.
116+
reset_queries()
117+
118+
logger.info('Companies House load complete, %s records loaded', count)
105119

106120

107121
@transaction.atomic
@@ -112,20 +126,18 @@ def truncate_ch_companies_table():
112126
"""
113127
cursor = connection.cursor()
114128
table_name = CompaniesHouseCompany._meta.db_table
129+
logger.info('Truncating the %s table', table_name)
115130
query = f'truncate {table_name};'
116131
cursor.execute(query)
117132

118133

134+
def _create_ch_company(row_dict):
135+
return CompaniesHouseCompany(**row_dict)
136+
137+
119138
class Command(BaseCommand):
120139
"""Companies House sync command."""
121140

122141
def handle(self, *args, **options):
123142
"""Handle."""
124-
try:
125-
sync_ch(tmp_file_creator=tempfile.TemporaryFile, truncate_first=True)
126-
except Exception as e:
127-
with log_and_ignore_exceptions():
128-
client.captureException()
129-
130-
logger.exception('Failed to sync from ES')
131-
self.stderr.write(e)
143+
sync_ch(tmp_file_creator=tempfile.TemporaryFile, truncate_first=True)

datahub/company/migrations/0001_squash_0030_initial.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ class Migration(migrations.Migration):
116116
('future_interest_countries', models.ManyToManyField(blank=True, related_name='company_future_interest_countries', to='metadata.Country')),
117117
('headquarter_type', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to='metadata.HeadquarterType')),
118118
('one_list_account_owner', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='one_list_owned_companies', to=settings.AUTH_USER_MODEL)),
119-
('parent', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='subsidiaries', to='company.Company')),
119+
('parent', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='children', to='company.Company')),
120120
('registered_address_country', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='companys', to='metadata.Country')),
121121
('sector', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, to='metadata.Sector')),
122122
('trading_address_country', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='company_trading_address_country', to='metadata.Country')),

0 commit comments

Comments
 (0)