Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fmd 366 add dataset lineage link #416

Merged
merged 20 commits into from
Jun 13, 2024
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions home/service/details.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import os
from data_platform_catalogue.entities import RelationshipType
from data_platform_catalogue.search_types import MultiSelectFilter, ResultType
from django.core.exceptions import ObjectDoesNotExist
from urllib.parse import urlsplit

from .base import GenericService

Expand Down Expand Up @@ -82,13 +84,34 @@ def __init__(self, urn: str):
self.context = self._get_context()

def _get_context(self):
split_datahub_url = urlsplit(
os.getenv("CATALOGUE_URL", "https://test-catalogue.gov.uk")
)

return {
"table": self.table_metadata,
"parent_entity": self.parent_entity,
"dataset_parent_type": self.dataset_parent_type,
"h1_value": "Details",
"has_lineage": self.get_has_lineage(),
"lineage_url": f"{split_datahub_url.scheme}://{split_datahub_url.netloc}/dataset/{self.table_metadata.urn}/Lineage?is_lineage_mode=true&",
}

def get_has_lineage(self) -> bool:
"""
Inspects the relationships property of the Table model to establish if a
Dataset has any lineage recorded in datahub.
"""
has_lineage = (
True
if len(
self.table_metadata.relationships.get(RelationshipType.DATA_LINEAGE, [])
)
> 0
else False
)
return has_lineage


class ChartDetailsService(GenericService):
def __init__(self, urn: str):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -232,21 +232,23 @@ def get_table_details(self, urn) -> Table:
created, modified = parse_created_and_modified(properties)
name, display_name, qualified_name = parse_names(response, properties)

# A dataset can't have multiple parents, but if we did
# start to use in that we'd need to change this
if response["container_relations"]["total"] > 0:
relations = parse_relations(
RelationshipType.PARENT, response["container_relations"]
)
else:
relations = {}
lineage_relations = parse_relations(
RelationshipType.DATA_LINEAGE,
[
response.get("downstream_lineage_relations", {}),
response.get("upstream_lineage_relations", {}),
],
)
parent_relations = parse_relations(
RelationshipType.PARENT, [response["parent_container_relations"]]
)
return Table(
urn=urn,
display_name=display_name,
name=name,
fully_qualified_name=qualified_name,
description=properties.get("description", ""),
relationships=relations,
relationships={**lineage_relations, **parent_relations},
domain=domain,
governance=Governance(
data_owner=owner,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,50 @@ query getDatasetDetails($urn: String!) {
}
}
}
downstream_lineage_relations: lineage (
input: {direction: DOWNSTREAM
start:0,
count:10}
) {
total
relationships{
type
entity{
urn
... on Dataset {
name
properties{
name
}
}
type
}
}
}
upstream_lineage_relations: lineage (
input: {direction: UPSTREAM
start:0,
count:10}
) {
total
relationships{
type
entity{
urn
... on Dataset {
name
properties{
name
}
}
type
}
}
}
subTypes {
typeNames
}
container_relations: relationships(
parent_container_relations: relationships(
input: { types: ["IsPartOf"], direction: OUTGOING, count: 10 }
) {
total
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,25 +99,6 @@ query Search(
subTypes {
typeNames
}
relationships(
input: {
types: ["DataProductContains"]
direction: INCOMING
count: 10
}
) {
total
relationships {
entity {
urn
... on DataProduct {
properties {
name
}
}
}
}
}
ownership {
owners {
owner {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -235,21 +235,26 @@ def parse_columns(entity: dict[str, Any]) -> list[Column]:


def parse_relations(
relationship_type: RelationshipType, relations_dict: dict
relationship_type: RelationshipType, relations_list: list[dict]
) -> dict[RelationshipType, list[EntityRef]]:
"""
parse the relationships results returned from a graphql querys
"""
# we may want to do soemthing with total realtion if we are returning child

# we may want to do something with total relations if we are returning child
# relations and need to paginate through relations - 10 relations returned as is
# total_relations = relations_dict.get("total", 0)
parent_entities = relations_dict.get("relationships", [])
related_entities = [
EntityRef(
urn=i["entity"]["urn"], display_name=i["entity"]["properties"]["name"]
)
for i in parent_entities
]
# There may be more than 10 lineage entities but since we currently only care
# if lineage exists for a dataset we don't need to capture everything
related_entities = []
for j in relations_list:
for i in j["relationships"]:
urn = i.get("entity").get("urn")
display_name = (
i.get("entity").get("properties").get("name")
if i.get("entity", {}).get("properties") is not None
else i.get("entity").get("name")
)
related_entities.append(EntityRef(urn=urn, display_name=display_name))

relations_return = {relationship_type: related_entities}
return relations_return
5 changes: 0 additions & 5 deletions lib/datahub-client/data_platform_catalogue/client/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
parse_names,
parse_owner,
parse_properties,
parse_relations,
parse_tags,
)
from data_platform_catalogue.entities import RelationshipType
Expand Down Expand Up @@ -256,16 +255,12 @@ def _parse_result(
last_modified = parse_last_modified(entity)
name, display_name, qualified_name = parse_names(entity, properties)

relations = parse_relations(
RelationshipType.PARENT, entity.get("relationships", {})
)
domain = parse_domain(entity)

metadata = {
"owner": owner.display_name,
"owner_email": owner.email,
"total_parents": entity.get("relationships", {}).get("total", 0),
"parents": relations[RelationshipType.PARENT],
"domain_name": domain.display_name,
"domain_id": domain.urn,
"entity_types": self._parse_types_and_sub_types(entity, "Dataset"),
Expand Down
1 change: 1 addition & 0 deletions lib/datahub-client/data_platform_catalogue/entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
class RelationshipType(Enum):
PARENT = "PARENT"
PLATFORM = "PLATFORM"
DATA_LINEAGE = "DATA_LINEAGE"


class EntityRef(BaseModel):
Expand Down
23 changes: 16 additions & 7 deletions lib/datahub-client/tests/client/datahub/test_datahub_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,17 +249,22 @@ def test_get_dataset(
urn = "abc"
datahub_response = {
"dataset": {
"type": "DATASET",
"platform": {"name": "datahub"},
"ownership": None,
"subTypes": None,
"container_relations": {
"downstream_lineage_relations": {"total": 0, "relationships": []},
"upstream_lineage_relations": {"total": 0, "relationships": []},
"parent_container_relations": {
"total": 1,
"relationships": [
{
"type": "IsPartOf",
"direction": "OUTGOING",
"entity": {
"urn": "urn:li:container:database",
"properties": {"name": "database"},
}
},
}
],
},
Expand Down Expand Up @@ -332,7 +337,8 @@ def test_get_dataset(
relationships={
RelationshipType.PARENT: [
EntityRef(urn="urn:li:container:database", display_name="database")
]
],
RelationshipType.DATA_LINEAGE: [],
},
domain=DomainRef(display_name="", urn=""),
governance=Governance(
Expand Down Expand Up @@ -376,9 +382,9 @@ def test_get_dataset_minimal_properties(
"platform": {"name": "datahub"},
"name": "notinproperties",
"properties": {},
"container_relations": {
"total": 0,
},
"downstream_lineage_relations": {"total": 0, "relationships": []},
"upstream_lineage_relations": {"total": 0, "relationships": []},
"parent_container_relations": {"total": 0, "relationships": []},
"data_product_relations": {"total": 0, "relationships": []},
"schemaMetadata": {"fields": []},
}
Expand All @@ -397,7 +403,10 @@ def test_get_dataset_minimal_properties(
name="notinproperties",
fully_qualified_name="notinproperties",
description="",
relationships={},
relationships={
RelationshipType.PARENT: [],
RelationshipType.DATA_LINEAGE: [],
},
domain=DomainRef(display_name="", urn=""),
governance=Governance(
data_owner=OwnerRef(display_name="", email="", urn=""),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ def test_parse_relations():
],
}
}
result = parse_relations(RelationshipType.PARENT, relations["relationships"])
result = parse_relations(RelationshipType.PARENT, [relations["relationships"]])
assert result == {
RelationshipType.PARENT: [
EntityRef(urn="urn:li:dataProduct:test", display_name="test")
Expand All @@ -197,7 +197,7 @@ def test_parse_relations():

def test_parse_relations_blank():
relations = {"relationships": {"total": 0, "relationships": []}}
result = parse_relations(RelationshipType.PARENT, relations["relationships"])
result = parse_relations(RelationshipType.PARENT, [relations["relationships"]])
assert result == {RelationshipType.PARENT: []}


Expand Down
9 changes: 0 additions & 9 deletions lib/datahub-client/tests/client/datahub/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,6 @@ def test_one_search_result(mock_graph, searcher):
"owner": "",
"owner_email": "",
"total_parents": 0,
"parents": [],
"domain_name": "HMPPS",
"domain_id": "urn:li:domain:3dc18e48-c062-4407-84a9-73e23f768023",
"entity_types": {
Expand Down Expand Up @@ -210,7 +209,6 @@ def test_dataset_result(mock_graph, searcher):
"owner": "",
"owner_email": "",
"total_parents": 0,
"parents": [],
"domain_name": "HMPPS",
"domain_id": "urn:li:domain:3dc18e48-c062-4407-84a9-73e23f768023",
"entity_types": {
Expand Down Expand Up @@ -302,7 +300,6 @@ def test_full_page(mock_graph, searcher):
"owner": "",
"owner_email": "",
"total_parents": 0,
"parents": [],
"domain_name": "",
"domain_id": "",
"entity_types": {
Expand Down Expand Up @@ -334,7 +331,6 @@ def test_full_page(mock_graph, searcher):
"owner": "",
"owner_email": "",
"total_parents": 0,
"parents": [],
"domain_name": "",
"domain_id": "",
"entity_types": {
Expand Down Expand Up @@ -364,7 +360,6 @@ def test_full_page(mock_graph, searcher):
"owner": "",
"owner_email": "",
"total_parents": 0,
"parents": [],
"domain_name": "",
"domain_id": "",
"entity_types": {
Expand Down Expand Up @@ -444,7 +439,6 @@ def test_query_match(mock_graph, searcher):
"owner": "",
"owner_email": "",
"total_parents": 0,
"parents": [],
"domain_name": "",
"domain_id": "",
"entity_types": {
Expand Down Expand Up @@ -520,7 +514,6 @@ def test_result_with_owner(mock_graph, searcher):
"owner": "Shannon Lovett",
"owner_email": "shannon@longtail.com",
"total_parents": 0,
"parents": [],
"domain_name": "",
"domain_id": "",
"entity_types": {
Expand Down Expand Up @@ -891,7 +884,6 @@ def test_search_for_charts(mock_graph, searcher):
"owner": "",
"owner_email": "",
"total_parents": 0,
"parents": [],
"domain_name": "",
"domain_id": "",
"entity_types": {
Expand Down Expand Up @@ -1091,7 +1083,6 @@ def test_list_database_tables(mock_graph, searcher):
"owner": "",
"owner_email": "",
"total_parents": 0,
"parents": [],
"domain_name": "",
"domain_id": "",
"entity_types": {
Expand Down
14 changes: 11 additions & 3 deletions templates/details_table.html
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,17 @@ <h3 class="govuk-heading-s govuk-!-margin-top-3">
<h2 class="govuk-heading-m">Table schema</h2>
<p class="govuk-body">The schema for this table is not available.</p>
{% endif %}


</div>
{% if has_lineage %}
<h2 class="govuk-heading-m">Lineage</h2>
<div class="govuk-body-m" >
If you are interested to find out what data were used to create this table or if this table is used to create any further tables, you can see that information via the lineage.
</div class="govuk-body-m">
<div class="govuk-body">
<a href="{{lineage_url}}" class="govuk-link">
View lineage in DataHub
</a>
</div>
{% endif %}
</div>

{% endblock content %}
Loading