Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Serve SharePoint documents from generic view #5989

Merged
merged 7 commits into from
Mar 6, 2025
Merged
7 changes: 5 additions & 2 deletions config/api_urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from datahub.company_referral import urls as company_referral_urls
from datahub.dataset import urls as dataset_urls
from datahub.dnb_api import urls as dnb_api_urls
from datahub.documents import urls as document_urls
from datahub.event import urls as event_urls
from datahub.export_win import urls as export_win_urls
from datahub.feature_flag import urls as feature_flag_urls
Expand Down Expand Up @@ -106,7 +107,9 @@
include((investment_lead_urls, 'investment-lead'), namespace='investment-lead'),
),
path(
'company-activity/', include((company_activity_urls,
'company-activity'), namespace='company-activity'),
'company-activity/', include(
(company_activity_urls, 'company-activity'), namespace='company-activity',
),
),
path('document/', include((document_urls, 'document'), namespace='document')),
]
36 changes: 36 additions & 0 deletions datahub/documents/migrations/0006_sharepointdocument.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Generated by Django 4.2.19 on 2025-03-04 20:51

from django.conf import settings
from django.db import migrations, models
import django.db.models.deletion
import uuid


class Migration(migrations.Migration):

dependencies = [
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
('documents', '0005_switch_to_booleanfield_with_null_kwarg'),
]

operations = [
migrations.CreateModel(
name='SharePointDocument',
fields=[
('created_on', models.DateTimeField(auto_now_add=True, db_index=True, null=True)),
('modified_on', models.DateTimeField(auto_now=True, null=True)),
('archived', models.BooleanField(default=False)),
('archived_on', models.DateTimeField(blank=True, null=True)),
('archived_reason', models.TextField(blank=True, null=True)),
('id', models.UUIDField(default=uuid.uuid4, primary_key=True, serialize=False)),
('title', models.CharField(blank=True, default='', max_length=255)),
('url', models.URLField(max_length=255)),
('archived_by', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='+', to=settings.AUTH_USER_MODEL)),
('created_by', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='+', to=settings.AUTH_USER_MODEL)),
('modified_by', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='+', to=settings.AUTH_USER_MODEL)),
],
options={
'abstract': False,
},
),
]
39 changes: 39 additions & 0 deletions datahub/documents/migrations/0007_genericdocument.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# Generated by Django 4.2.19 on 2025-03-04 20:51

from django.conf import settings
from django.db import migrations, models
import django.db.models.deletion
import uuid


class Migration(migrations.Migration):

dependencies = [
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
('contenttypes', '0002_remove_content_type_name'),
('documents', '0006_sharepointdocument'),
]

operations = [
migrations.CreateModel(
name='GenericDocument',
fields=[
('created_on', models.DateTimeField(auto_now_add=True, db_index=True, null=True)),
('modified_on', models.DateTimeField(auto_now=True, null=True)),
('archived', models.BooleanField(default=False)),
('archived_on', models.DateTimeField(blank=True, null=True)),
('archived_reason', models.TextField(blank=True, null=True)),
('id', models.UUIDField(default=uuid.uuid4, primary_key=True, serialize=False)),
('document_object_id', models.UUIDField()),
('related_object_id', models.UUIDField()),
('archived_by', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='+', to=settings.AUTH_USER_MODEL)),
('created_by', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='+', to=settings.AUTH_USER_MODEL)),
('document_type', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='documents', to='contenttypes.contenttype')),
('modified_by', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='+', to=settings.AUTH_USER_MODEL)),
('related_object_type', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='related_documents', to='contenttypes.contenttype')),
],
options={
'indexes': [models.Index(fields=['document_type', 'document_object_id', 'related_object_type', 'related_object_id'], name='documents_g_documen_2aba68_idx')],
},
),
]
60 changes: 60 additions & 0 deletions datahub/documents/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
from logging import getLogger

from django.conf import settings
from django.contrib.contenttypes.fields import GenericForeignKey
from django.contrib.contenttypes.models import ContentType
from django.db import models, transaction
from django.utils.timezone import now

Expand Down Expand Up @@ -201,3 +203,61 @@ class AbstractEntityDocumentModel(BaseModel):

class Meta:
abstract = True


class SharePointDocument(BaseModel, ArchivableModel):
"""Model to represent documents in SharePoint."""

id = models.UUIDField(primary_key=True, default=uuid.uuid4)
title = models.CharField(max_length=settings.CHAR_FIELD_MAX_LENGTH, blank=True, default='')
url = models.URLField(max_length=settings.CHAR_FIELD_MAX_LENGTH)

def __str__(self):
return self.title


class GenericDocument(BaseModel, ArchivableModel):
"""A single model to represent documents of varying types.

The idea behind this model is to serve as a single interaction point for documents,
irrespective of type. For example, those uploaded to an S3 bucket, or those stored in
SharePoint. Each type of document will have different CRUD operations, but this model,
along with it's serializer and viewset, will enable all actions from a single endpoint.

This model has two generic relations:
1. To the type-specific document model instance (e.g. SharePointDocument or UploadableDocument)
2. To the model instance the document relates to (e.g. Company, or InvestmentProject)
"""

id = models.UUIDField(primary_key=True, default=uuid.uuid4)

# Generic relation to type-specific document model instance
document_type = models.ForeignKey(
ContentType,
on_delete=models.CASCADE,
related_name='documents',
)
document_object_id = models.UUIDField()
document = GenericForeignKey('document_type', 'document_object_id')

# Generic relation to model instance the document relates to
related_object_type = models.ForeignKey(
ContentType,
on_delete=models.CASCADE,
related_name='related_documents',
)
related_object_id = models.UUIDField()
related_object = GenericForeignKey('related_object_type', 'related_object_id')

class Meta:
indexes = [
models.Index(fields=[
'document_type',
'document_object_id',
'related_object_type',
'related_object_id',
]),
]

def __str__(self):
return f'{self.document} for {self.related_object}'
93 changes: 93 additions & 0 deletions datahub/documents/serializers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
from django.contrib.contenttypes.models import ContentType
from rest_framework import serializers

from datahub.company.models import (
Advisor,
Company,
)
from datahub.core.serializers import NestedRelatedField
from datahub.documents.models import (
GenericDocument,
SharePointDocument,
)
from datahub.documents.utils import format_content_type


class SharePointDocumentSerializer(serializers.ModelSerializer):

created_by = NestedRelatedField(Advisor, extra_fields=['name', 'email'])
modified_by = NestedRelatedField(Advisor, extra_fields=['name', 'email'])

class Meta:
model = SharePointDocument
fields = '__all__'


class DocumentRelatedField(serializers.RelatedField):
"""Serializer field for the GenericDocument.document field.

Currently, only SharePointDocument objects are supported.

To add support for another document type, add an elif statement to the to_representation
method to check for the new model and set the serializer accordingly.

For example:

```
elif isinstance(instance, YourDocumentModel):
serializer = YourDocumentSerializer(instance)
```
"""

def to_representation(self, instance):
"""Convert model instance to built-in Python (JSON friendly) data types."""
if isinstance(instance, SharePointDocument):
serializer = SharePointDocumentSerializer(instance)
else:
raise Exception(f'Unexpected document type: {type(instance)}')

Check warning on line 47 in datahub/documents/serializers.py

View check run for this annotation

Codecov / codecov/patch

datahub/documents/serializers.py#L47

Added line #L47 was not covered by tests
return serializer.data


class RelatedObjectRelatedField(serializers.RelatedField):
"""Serializer field for the GenericDocument.related_object field.

Currently, only Company objects are support.

To add support for another type of related object, add the model to the tuple
in the `isinstance` call in the to_representation method - e.g.
`isinstance(instance, (Company, YourModel, ...))`. The model must contain the fields
`id` and `name`, otherwise, you will need to add an elif statement and customise
the return object accordingly.
"""

def to_representation(self, instance):
"""Convert model instance to built-in Python (JSON friendly) data types."""
content_type = ContentType.objects.get_for_model(instance)
if isinstance(instance, (Company)):
return {
'id': str(instance.id),
'name': instance.name,
}
return Exception(f'Unexpected type of related object: {content_type.model}')

Check warning on line 71 in datahub/documents/serializers.py

View check run for this annotation

Codecov / codecov/patch

datahub/documents/serializers.py#L71

Added line #L71 was not covered by tests


class GenericDocumentRetrieveSerializer(serializers.ModelSerializer):
"""Serializer for retrieving Generic Document objects."""

created_by = NestedRelatedField(Advisor, extra_fields=['name', 'email'])
modified_by = NestedRelatedField(Advisor, extra_fields=['name', 'email'])
document = DocumentRelatedField(read_only=True)
related_object = RelatedObjectRelatedField(read_only=True)

class Meta:
model = GenericDocument
fields = '__all__'

def to_representation(self, instance):
"""Convert model instance to built-in Python (JSON friendly) data types."""
representation = super().to_representation(instance)
representation.update({
'document_type': format_content_type(instance.document_type),
'related_object_type': format_content_type(instance.related_object_type),
})
return representation
37 changes: 36 additions & 1 deletion datahub/documents/test/factories.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@

import factory

from datahub.company.test.factories import AdviserFactory
from datahub.company.test.factories import (
AdviserFactory,
CompanyFactory,
)
from datahub.documents.models import UploadStatus


Expand All @@ -22,3 +25,35 @@ class DocumentFactory(factory.django.DjangoModelFactory):

class Meta:
model = 'documents.Document'


class SharePointDocumentFactory(factory.django.DjangoModelFactory):

title = factory.Faker('text', max_nb_chars=20)
url = factory.Faker('url')
created_by = factory.SubFactory(AdviserFactory)
modified_by = factory.SubFactory(AdviserFactory)

class Meta:
model = 'documents.SharePointDocument'


class CompanySharePointDocumentFactory(factory.django.DjangoModelFactory):
"""Generates a GenericDocument instance linking a Company to a SharePointDocument."""

document = factory.SubFactory(SharePointDocumentFactory)
related_object = factory.SubFactory(CompanyFactory)
created_by = factory.SubFactory(AdviserFactory)
modified_by = factory.SubFactory(AdviserFactory)
archived = False

class Meta:
model = 'documents.GenericDocument'

@factory.post_generation
def sync_created_and_modified_on_document_instance(obj, create, extracted, **kwargs): # noqa
obj.document.created_by = obj.created_by
obj.document.modified_by = obj.modified_by
obj.document.created_on = obj.created_on
obj.document.modified_on = obj.modified_on
obj.document.save()
35 changes: 35 additions & 0 deletions datahub/documents/test/test_serializers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import pytest

from datahub.documents.serializers import (
GenericDocumentRetrieveSerializer,
SharePointDocumentSerializer,
)
from datahub.documents.test.factories import (
CompanySharePointDocumentFactory,
SharePointDocumentFactory,
)
from datahub.documents.utils import (
assert_retrieved_generic_document,
assert_retrieved_sharepoint_document,
)


pytestmark = pytest.mark.django_db


class TestSharePointDocumentSerializer:
"""Tests for SharePointDocumentSerializer"""

def test_serializing_instance_returns_expected_fields(self):
sharepoint_document = SharePointDocumentFactory()
serializer = SharePointDocumentSerializer(sharepoint_document)
assert_retrieved_sharepoint_document(sharepoint_document, serializer.data)


class TestGenericDocumentRetrieveSerializer:
"""Tests for GenericDocumentRetrieveSerializer"""

def test_serializing_instance_returns_expected_fields(self):
generic_document = CompanySharePointDocumentFactory()
serializer = GenericDocumentRetrieveSerializer(generic_document)
assert_retrieved_generic_document(generic_document, serializer.data)
16 changes: 16 additions & 0 deletions datahub/documents/test/test_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import pytest

from django.contrib.contenttypes.models import ContentType

from datahub.documents.models import GenericDocument
from datahub.documents.utils import format_content_type


pytestmark = pytest.mark.django_db


def test_format_content_type():
content_type = ContentType.objects.get_for_model(GenericDocument)
result = format_content_type(content_type)
expected = f'{content_type.app_label}.{content_type.model}'
assert result == expected
Loading