From 3ed901ce641a11536a5a4edc84ee06885aa672f2 Mon Sep 17 00:00:00 2001 From: LavMatt Date: Fri, 7 Jun 2024 17:40:11 +0100 Subject: [PATCH 01/18] add upstream and downstream lineage to getDatasetDetails graphql query --- .../client/graphql/getDatasetDetails.graphql | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/lib/datahub-client/data_platform_catalogue/client/graphql/getDatasetDetails.graphql b/lib/datahub-client/data_platform_catalogue/client/graphql/getDatasetDetails.graphql index 9f40e2aa..637b0ddd 100644 --- a/lib/datahub-client/data_platform_catalogue/client/graphql/getDatasetDetails.graphql +++ b/lib/datahub-client/data_platform_catalogue/client/graphql/getDatasetDetails.graphql @@ -14,6 +14,34 @@ query getDatasetDetails($urn: String!) { } } } + downstream_lineage_relations: lineage ( + input: {direction: DOWNSTREAM + start:0, + count:10} + ) { + total + relationships{ + type + entity{ + urn + type + } + } + } + upstream_lineage_relations: lineage ( + input: {direction: UPSTREAM + start:0, + count:10} + ) { + total + relationships{ + type + entity{ + urn + type + } + } + } subTypes { typeNames } From 83605ebb9557b9c14f2a8bab6282d9e8e039c967 Mon Sep 17 00:00:00 2001 From: LavMatt Date: Fri, 7 Jun 2024 17:41:04 +0100 Subject: [PATCH 02/18] refactor parse_relations() helper to handle more relations --- .../client/graphql_helpers.py | 45 +++++++++++++++---- 1 file changed, 36 insertions(+), 9 deletions(-) diff --git a/lib/datahub-client/data_platform_catalogue/client/graphql_helpers.py b/lib/datahub-client/data_platform_catalogue/client/graphql_helpers.py index 153f2ec9..53a838be 100644 --- a/lib/datahub-client/data_platform_catalogue/client/graphql_helpers.py +++ b/lib/datahub-client/data_platform_catalogue/client/graphql_helpers.py @@ -231,22 +231,49 @@ def parse_columns(entity: dict[str, Any]) -> list[Column]: return sorted(result, key=lambda c: (0 if c.is_primary_key else 1, c.name)) -def parse_relations( - relationship_type: RelationshipType, relations_dict: dict -) -> dict[RelationshipType, list[EntityRef]]: +def parse_relations(response: dict) -> dict[RelationshipType, list[EntityRef]]: """ parse the relationships results returned from a graphql querys """ # # we may want to do soemthing with total realtion if we are returning child relations # # and need to paginate through relations - 10 relations returned as is # total_relations = relations_dict.get("total", 0) - parent_entities = relations_dict.get("relationships", []) - related_entities = [ + + # any relationship related graphl query results should be returned aginst an + # alias with the `_relations` suffix + relationships_dict = {} + relationships = [] + for key, value in response.items(): + if key.endswith("_relations"): + relationships.extend(value.get("relationships", [])) + + relationships_dict[RelationshipType.PARENT] = [ EntityRef( - urn=i["entity"]["urn"], display_name=i["entity"]["properties"]["name"] + urn=r["entity"]["urn"], display_name=r["entity"]["properties"]["name"] ) - for i in parent_entities + for r in relationships + if r.get("type") == "IsPartOf" + ] + + relationships_dict[RelationshipType.UPSTREAM_LINEAGE] = [ + EntityRef(urn=r["entity"]["urn"], display_name="") + for r in relationships + if r.get("type") == "UpstreamOf" ] - relations_return = {relationship_type: related_entities} - return relations_return + relationships_dict[RelationshipType.DOWNSTREAM_LINEAGE] = [ + EntityRef(urn=r["entity"]["urn"], display_name="") + for r in relationships + if r.get("type") == "DownstreamOf" + ] + + # parent_entities = relations_dict.get("relationships", []) + # related_entities = [ + # EntityRef( + # urn=i["entity"]["urn"], display_name=i["entity"]["properties"]["name"] + # ) + # for i in parent_entities + # ] + + # relations_return = {relationship_type: related_entities} + return relationships_dict From c7460f14cc1aacea13160c0fbdb90ee3b350adcb Mon Sep 17 00:00:00 2001 From: LavMatt Date: Fri, 7 Jun 2024 17:42:44 +0100 Subject: [PATCH 03/18] add upstream and downstream lineage to RelationshipType enum --- lib/datahub-client/data_platform_catalogue/entities.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/datahub-client/data_platform_catalogue/entities.py b/lib/datahub-client/data_platform_catalogue/entities.py index 774c897e..795bb395 100644 --- a/lib/datahub-client/data_platform_catalogue/entities.py +++ b/lib/datahub-client/data_platform_catalogue/entities.py @@ -10,6 +10,8 @@ class RelationshipType(Enum): PARENT = "PARENT" PLATFORM = "PLATFORM" + UPSTREAM_LINEAGE = "UPSTREAM_LINEAGE" + DOWNSTREAM_LINEAGE = "DOWNSTREAM_LINEAGE" class EntityRef(BaseModel): @@ -318,9 +320,7 @@ class Table(Entity): urn: str | None = Field( description="Unique identifier for the entity. Relates to Datahub's urn", - examples=[ - "urn:li:dataset:(urn:li:dataPlatform:redshift,public.table,DEV)" - ], + examples=["urn:li:dataset:(urn:li:dataPlatform:redshift,public.table,DEV)"], ) column_details: list[Column] = Field( description="A list of objects which relate to columns in your data, each list item will contain, a name of" From e5e88d71c486904cdfeea5d2bce3ee690c17cf36 Mon Sep 17 00:00:00 2001 From: LavMatt Date: Fri, 7 Jun 2024 17:44:39 +0100 Subject: [PATCH 04/18] update parse_relations() input args --- .../data_platform_catalogue/client/datahub_client.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/lib/datahub-client/data_platform_catalogue/client/datahub_client.py b/lib/datahub-client/data_platform_catalogue/client/datahub_client.py index e6016b81..4266733e 100644 --- a/lib/datahub-client/data_platform_catalogue/client/datahub_client.py +++ b/lib/datahub-client/data_platform_catalogue/client/datahub_client.py @@ -232,14 +232,8 @@ def get_table_details(self, urn) -> Table: created, modified = parse_created_and_modified(properties) name, display_name, qualified_name = parse_names(response, properties) - # A dataset can't have multiple parents, but if we did - # start to use in that we'd need to change this - if response["container_relations"]["total"] > 0: - relations = parse_relations( - RelationshipType.PARENT, response["container_relations"] - ) - else: - relations = {} + relations = parse_relations(response) + return Table( urn=urn, display_name=display_name, From 1fbf3bd14eb157ad784fb2011713b4feba46fdb8 Mon Sep 17 00:00:00 2001 From: LavMatt Date: Fri, 7 Jun 2024 17:45:56 +0100 Subject: [PATCH 05/18] update parse_relations() input args in search --- lib/datahub-client/data_platform_catalogue/client/search.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lib/datahub-client/data_platform_catalogue/client/search.py b/lib/datahub-client/data_platform_catalogue/client/search.py index 48e9d719..09190c40 100644 --- a/lib/datahub-client/data_platform_catalogue/client/search.py +++ b/lib/datahub-client/data_platform_catalogue/client/search.py @@ -256,9 +256,8 @@ def _parse_result( last_modified = parse_last_modified(entity) name, display_name, qualified_name = parse_names(entity, properties) - relations = parse_relations( - RelationshipType.PARENT, entity.get("relationships", {}) - ) + # Could maybe do better with this. Should revisit + relations = parse_relations({"key_relations": entity}) domain = parse_domain(entity) metadata = { From d8de3c67b00103bd74fefef22b8528f348bc494a Mon Sep 17 00:00:00 2001 From: LavMatt Date: Fri, 7 Jun 2024 17:47:55 +0100 Subject: [PATCH 06/18] add has_lineage and lineage_url to dataset details context --- home/service/details.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/home/service/details.py b/home/service/details.py index 4525b792..3f111fbe 100644 --- a/home/service/details.py +++ b/home/service/details.py @@ -1,6 +1,8 @@ +import os from data_platform_catalogue.entities import RelationshipType from data_platform_catalogue.search_types import MultiSelectFilter, ResultType from django.core.exceptions import ObjectDoesNotExist +from urllib.parse import urlsplit from .base import GenericService @@ -82,13 +84,33 @@ def __init__(self, urn: str): self.context = self._get_context() def _get_context(self): + split_datahub_url = urlsplit(os.environ["CATALOGUE_URL"]) + return { "table": self.table_metadata, "parent_entity": self.parent_entity, "dataset_parent_type": self.dataset_parent_type, "h1_value": "Details", + "has_lineage": self.get_has_lineage(), + "lineage_url": f"{split_datahub_url.scheme}://{split_datahub_url.netloc}/dataset/{self.table_metadata.urn}/Lineage?is_lineage_mode=true&", } + def get_has_lineage(self) -> bool: + """ + Inspects the relationships property of the Table model to establish if a + Dataset has any lineage recorded in datahub. + """ + has_lineage = ( + True + if len(self.table_metadata.relationships[RelationshipType.UPSTREAM_LINEAGE]) + + len( + self.table_metadata.relationships[RelationshipType.DOWNSTREAM_LINEAGE] + ) + > 0 + else False + ) + return has_lineage + class ChartDetailsService(GenericService): def __init__(self, urn: str): From 219fb473864d33b1f9887ce863601a9a315e8c8a Mon Sep 17 00:00:00 2001 From: LavMatt Date: Fri, 7 Jun 2024 17:48:30 +0100 Subject: [PATCH 07/18] add lineage link to details_table template --- templates/details_table.html | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/templates/details_table.html b/templates/details_table.html index 413619e4..8a36cbe8 100644 --- a/templates/details_table.html +++ b/templates/details_table.html @@ -78,9 +78,21 @@

Table schema

The schema for this table is not available.

{% endif %} - - + {% if has_lineage %} +
+
+

+ URL +

+ +
+
+ {% endif %} {% endblock content %} From e0ce81700a802389b96af6bfcac839d80f695e3b Mon Sep 17 00:00:00 2001 From: LavMatt Date: Mon, 10 Jun 2024 14:46:37 +0100 Subject: [PATCH 08/18] remove redundant block in query for data product relationships --- .../client/graphql/search.graphql | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/lib/datahub-client/data_platform_catalogue/client/graphql/search.graphql b/lib/datahub-client/data_platform_catalogue/client/graphql/search.graphql index cbd17b69..417f53d7 100644 --- a/lib/datahub-client/data_platform_catalogue/client/graphql/search.graphql +++ b/lib/datahub-client/data_platform_catalogue/client/graphql/search.graphql @@ -99,25 +99,6 @@ query Search( subTypes { typeNames } - relationships( - input: { - types: ["DataProductContains"] - direction: INCOMING - count: 10 - } - ) { - total - relationships { - entity { - urn - ... on DataProduct { - properties { - name - } - } - } - } - } ownership { owners { owner { From d3e16f42e11e835183a63fbdc19772ac88d29b26 Mon Sep 17 00:00:00 2001 From: LavMatt Date: Mon, 10 Jun 2024 14:47:41 +0100 Subject: [PATCH 09/18] return entity name for lineage --- .../client/graphql/getDatasetDetails.graphql | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/lib/datahub-client/data_platform_catalogue/client/graphql/getDatasetDetails.graphql b/lib/datahub-client/data_platform_catalogue/client/graphql/getDatasetDetails.graphql index 637b0ddd..02a2dd7a 100644 --- a/lib/datahub-client/data_platform_catalogue/client/graphql/getDatasetDetails.graphql +++ b/lib/datahub-client/data_platform_catalogue/client/graphql/getDatasetDetails.graphql @@ -24,6 +24,12 @@ query getDatasetDetails($urn: String!) { type entity{ urn + ... on Dataset { + name + properties{ + name + } + } type } } @@ -38,6 +44,12 @@ query getDatasetDetails($urn: String!) { type entity{ urn + ... on Dataset { + name + properties{ + name + } + } type } } @@ -45,7 +57,7 @@ query getDatasetDetails($urn: String!) { subTypes { typeNames } - container_relations: relationships( + parent_container_relations: relationships( input: { types: ["IsPartOf"], direction: OUTGOING, count: 10 } ) { total From c643eccf0bd3d30dc0cd4f1f679a408bb14acd1c Mon Sep 17 00:00:00 2001 From: LavMatt Date: Mon, 10 Jun 2024 14:48:32 +0100 Subject: [PATCH 10/18] have only 1 RelationshipType for lineage --- lib/datahub-client/data_platform_catalogue/entities.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/datahub-client/data_platform_catalogue/entities.py b/lib/datahub-client/data_platform_catalogue/entities.py index 795bb395..12a29596 100644 --- a/lib/datahub-client/data_platform_catalogue/entities.py +++ b/lib/datahub-client/data_platform_catalogue/entities.py @@ -10,8 +10,7 @@ class RelationshipType(Enum): PARENT = "PARENT" PLATFORM = "PLATFORM" - UPSTREAM_LINEAGE = "UPSTREAM_LINEAGE" - DOWNSTREAM_LINEAGE = "DOWNSTREAM_LINEAGE" + DATA_LINEAGE = "DATA_LINEAGE" class EntityRef(BaseModel): From b8f25da694831d54251b11b0d70b92f1b1efbce1 Mon Sep 17 00:00:00 2001 From: LavMatt Date: Mon, 10 Jun 2024 14:50:55 +0100 Subject: [PATCH 11/18] simplfy `parse_relations()` helper function --- .../client/datahub_client.py | 14 ++++- .../client/graphql_helpers.py | 59 +++++-------------- .../data_platform_catalogue/client/search.py | 4 -- 3 files changed, 27 insertions(+), 50 deletions(-) diff --git a/lib/datahub-client/data_platform_catalogue/client/datahub_client.py b/lib/datahub-client/data_platform_catalogue/client/datahub_client.py index 4266733e..3f29a3f7 100644 --- a/lib/datahub-client/data_platform_catalogue/client/datahub_client.py +++ b/lib/datahub-client/data_platform_catalogue/client/datahub_client.py @@ -232,15 +232,23 @@ def get_table_details(self, urn) -> Table: created, modified = parse_created_and_modified(properties) name, display_name, qualified_name = parse_names(response, properties) - relations = parse_relations(response) - + lineage_relations = parse_relations( + RelationshipType.DATA_LINEAGE, + [ + response.get("downstream_lineage_relations", {}), + response.get("upstream_lineage_relations", {}), + ], + ) + parent_relations = parse_relations( + RelationshipType.PARENT, [response["parent_container_relations"]] + ) return Table( urn=urn, display_name=display_name, name=name, fully_qualified_name=qualified_name, description=properties.get("description", ""), - relationships=relations, + relationships={**lineage_relations, **parent_relations}, domain=domain, governance=Governance( data_owner=owner, diff --git a/lib/datahub-client/data_platform_catalogue/client/graphql_helpers.py b/lib/datahub-client/data_platform_catalogue/client/graphql_helpers.py index 53a838be..b1c0817e 100644 --- a/lib/datahub-client/data_platform_catalogue/client/graphql_helpers.py +++ b/lib/datahub-client/data_platform_catalogue/client/graphql_helpers.py @@ -231,49 +231,22 @@ def parse_columns(entity: dict[str, Any]) -> list[Column]: return sorted(result, key=lambda c: (0 if c.is_primary_key else 1, c.name)) -def parse_relations(response: dict) -> dict[RelationshipType, list[EntityRef]]: +def parse_relations( + relationship_type: RelationshipType, relations_list: list[dict] +) -> dict[RelationshipType, list[EntityRef]]: """ parse the relationships results returned from a graphql querys """ - # # we may want to do soemthing with total realtion if we are returning child relations - # # and need to paginate through relations - 10 relations returned as is - # total_relations = relations_dict.get("total", 0) - - # any relationship related graphl query results should be returned aginst an - # alias with the `_relations` suffix - relationships_dict = {} - relationships = [] - for key, value in response.items(): - if key.endswith("_relations"): - relationships.extend(value.get("relationships", [])) - - relationships_dict[RelationshipType.PARENT] = [ - EntityRef( - urn=r["entity"]["urn"], display_name=r["entity"]["properties"]["name"] - ) - for r in relationships - if r.get("type") == "IsPartOf" - ] - - relationships_dict[RelationshipType.UPSTREAM_LINEAGE] = [ - EntityRef(urn=r["entity"]["urn"], display_name="") - for r in relationships - if r.get("type") == "UpstreamOf" - ] - - relationships_dict[RelationshipType.DOWNSTREAM_LINEAGE] = [ - EntityRef(urn=r["entity"]["urn"], display_name="") - for r in relationships - if r.get("type") == "DownstreamOf" - ] - - # parent_entities = relations_dict.get("relationships", []) - # related_entities = [ - # EntityRef( - # urn=i["entity"]["urn"], display_name=i["entity"]["properties"]["name"] - # ) - # for i in parent_entities - # ] - - # relations_return = {relationship_type: related_entities} - return relationships_dict + related_entities = [] + for j in relations_list: + for i in j["relationships"]: + urn = i.get("entity").get("urn") + display_name = ( + i.get("entity").get("properties").get("name") + if i.get("entity", {}).get("properties") is not None + else i.get("entity").get("name") + ) + related_entities.append(EntityRef(urn=urn, display_name=display_name)) + + relations_return = {relationship_type: related_entities} + return relations_return diff --git a/lib/datahub-client/data_platform_catalogue/client/search.py b/lib/datahub-client/data_platform_catalogue/client/search.py index 09190c40..da6f69ae 100644 --- a/lib/datahub-client/data_platform_catalogue/client/search.py +++ b/lib/datahub-client/data_platform_catalogue/client/search.py @@ -11,7 +11,6 @@ parse_names, parse_owner, parse_properties, - parse_relations, parse_tags, ) from data_platform_catalogue.entities import RelationshipType @@ -256,15 +255,12 @@ def _parse_result( last_modified = parse_last_modified(entity) name, display_name, qualified_name = parse_names(entity, properties) - # Could maybe do better with this. Should revisit - relations = parse_relations({"key_relations": entity}) domain = parse_domain(entity) metadata = { "owner": owner.display_name, "owner_email": owner.email, "total_parents": entity.get("relationships", {}).get("total", 0), - "parents": relations[RelationshipType.PARENT], "domain_name": domain.display_name, "domain_id": domain.urn, "entity_types": self._parse_types_and_sub_types(entity, "Dataset"), From d8732e61caa2ab3392168a70e1b5270abfd8a435 Mon Sep 17 00:00:00 2001 From: LavMatt Date: Mon, 10 Jun 2024 14:51:52 +0100 Subject: [PATCH 12/18] update DatasetDetails to use single lineage type --- home/service/details.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/home/service/details.py b/home/service/details.py index 3f111fbe..6c316278 100644 --- a/home/service/details.py +++ b/home/service/details.py @@ -102,9 +102,8 @@ def get_has_lineage(self) -> bool: """ has_lineage = ( True - if len(self.table_metadata.relationships[RelationshipType.UPSTREAM_LINEAGE]) - + len( - self.table_metadata.relationships[RelationshipType.DOWNSTREAM_LINEAGE] + if len( + self.table_metadata.relationships.get(RelationshipType.DATA_LINEAGE, []) ) > 0 else False From 440eb51dad43ae8703cd4f7c69f275157b9a9311 Mon Sep 17 00:00:00 2001 From: LavMatt Date: Mon, 10 Jun 2024 14:52:27 +0100 Subject: [PATCH 13/18] align url to rest of table --- templates/details_table.html | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/templates/details_table.html b/templates/details_table.html index 8a36cbe8..16ab2567 100644 --- a/templates/details_table.html +++ b/templates/details_table.html @@ -80,8 +80,7 @@

Table schema

{% endif %} {% if has_lineage %} -
-
+

URL

@@ -91,7 +90,6 @@

-
{% endif %} From b3ab94d79660b945cb0844f3f836fb526518a351 Mon Sep 17 00:00:00 2001 From: LavMatt Date: Mon, 10 Jun 2024 14:53:27 +0100 Subject: [PATCH 14/18] update tests --- .../client/datahub/test_datahub_client.py | 23 +++++++++++++------ .../client/datahub/test_graphql_helpers.py | 4 ++-- .../tests/client/datahub/test_search.py | 9 -------- tests/benchmark/test_exact_matches.py | 4 ++-- tests/conftest.py | 3 ++- 5 files changed, 22 insertions(+), 21 deletions(-) diff --git a/lib/datahub-client/tests/client/datahub/test_datahub_client.py b/lib/datahub-client/tests/client/datahub/test_datahub_client.py index c7c9e797..6827e360 100644 --- a/lib/datahub-client/tests/client/datahub/test_datahub_client.py +++ b/lib/datahub-client/tests/client/datahub/test_datahub_client.py @@ -232,17 +232,22 @@ def test_get_dataset( urn = "abc" datahub_response = { "dataset": { + "type": "DATASET", "platform": {"name": "datahub"}, "ownership": None, "subTypes": None, - "container_relations": { + "downstream_lineage_relations": {"total": 0, "relationships": []}, + "upstream_lineage_relations": {"total": 0, "relationships": []}, + "parent_container_relations": { "total": 1, "relationships": [ { + "type": "IsPartOf", + "direction": "OUTGOING", "entity": { "urn": "urn:li:container:database", "properties": {"name": "database"}, - } + }, } ], }, @@ -315,7 +320,8 @@ def test_get_dataset( relationships={ RelationshipType.PARENT: [ EntityRef(urn="urn:li:container:database", display_name="database") - ] + ], + RelationshipType.DATA_LINEAGE: [], }, domain=DomainRef(display_name="", urn=""), governance=Governance( @@ -359,9 +365,9 @@ def test_get_dataset_minimal_properties( "platform": {"name": "datahub"}, "name": "notinproperties", "properties": {}, - "container_relations": { - "total": 0, - }, + "downstream_lineage_relations": {"total": 0, "relationships": []}, + "upstream_lineage_relations": {"total": 0, "relationships": []}, + "parent_container_relations": {"total": 0, "relationships": []}, "data_product_relations": {"total": 0, "relationships": []}, "schemaMetadata": {"fields": []}, } @@ -380,7 +386,10 @@ def test_get_dataset_minimal_properties( name="notinproperties", fully_qualified_name="notinproperties", description="", - relationships={}, + relationships={ + RelationshipType.PARENT: [], + RelationshipType.DATA_LINEAGE: [], + }, domain=DomainRef(display_name="", urn=""), governance=Governance( data_owner=OwnerRef(display_name="", email="", urn=""), diff --git a/lib/datahub-client/tests/client/datahub/test_graphql_helpers.py b/lib/datahub-client/tests/client/datahub/test_graphql_helpers.py index 5727a268..b379c45c 100644 --- a/lib/datahub-client/tests/client/datahub/test_graphql_helpers.py +++ b/lib/datahub-client/tests/client/datahub/test_graphql_helpers.py @@ -186,7 +186,7 @@ def test_parse_relations(): ], } } - result = parse_relations(RelationshipType.PARENT, relations["relationships"]) + result = parse_relations(RelationshipType.PARENT, [relations["relationships"]]) assert result == { RelationshipType.PARENT: [ EntityRef(urn="urn:li:dataProduct:test", display_name="test") @@ -196,7 +196,7 @@ def test_parse_relations(): def test_parse_relations_blank(): relations = {"relationships": {"total": 0, "relationships": []}} - result = parse_relations(RelationshipType.PARENT, relations["relationships"]) + result = parse_relations(RelationshipType.PARENT, [relations["relationships"]]) assert result == {RelationshipType.PARENT: []} diff --git a/lib/datahub-client/tests/client/datahub/test_search.py b/lib/datahub-client/tests/client/datahub/test_search.py index 908b3b3e..0d879a29 100644 --- a/lib/datahub-client/tests/client/datahub/test_search.py +++ b/lib/datahub-client/tests/client/datahub/test_search.py @@ -121,7 +121,6 @@ def test_one_search_result(mock_graph, searcher): "owner": "", "owner_email": "", "total_parents": 0, - "parents": [], "domain_name": "HMPPS", "domain_id": "urn:li:domain:3dc18e48-c062-4407-84a9-73e23f768023", "entity_types": { @@ -208,7 +207,6 @@ def test_dataset_result(mock_graph, searcher): "owner": "", "owner_email": "", "total_parents": 0, - "parents": [], "domain_name": "HMPPS", "domain_id": "urn:li:domain:3dc18e48-c062-4407-84a9-73e23f768023", "entity_types": { @@ -300,7 +298,6 @@ def test_full_page(mock_graph, searcher): "owner": "", "owner_email": "", "total_parents": 0, - "parents": [], "domain_name": "", "domain_id": "", "entity_types": { @@ -332,7 +329,6 @@ def test_full_page(mock_graph, searcher): "owner": "", "owner_email": "", "total_parents": 0, - "parents": [], "domain_name": "", "domain_id": "", "entity_types": { @@ -362,7 +358,6 @@ def test_full_page(mock_graph, searcher): "owner": "", "owner_email": "", "total_parents": 0, - "parents": [], "domain_name": "", "domain_id": "", "entity_types": { @@ -442,7 +437,6 @@ def test_query_match(mock_graph, searcher): "owner": "", "owner_email": "", "total_parents": 0, - "parents": [], "domain_name": "", "domain_id": "", "entity_types": { @@ -518,7 +512,6 @@ def test_result_with_owner(mock_graph, searcher): "owner": "Shannon Lovett", "owner_email": "shannon@longtail.com", "total_parents": 0, - "parents": [], "domain_name": "", "domain_id": "", "entity_types": { @@ -889,7 +882,6 @@ def test_search_for_charts(mock_graph, searcher): "owner": "", "owner_email": "", "total_parents": 0, - "parents": [], "domain_name": "", "domain_id": "", "entity_types": { @@ -1088,7 +1080,6 @@ def test_list_database_tables(mock_graph, searcher): "owner": "", "owner_email": "", "total_parents": 0, - "parents": [], "domain_name": "", "domain_id": "", "entity_types": { diff --git a/tests/benchmark/test_exact_matches.py b/tests/benchmark/test_exact_matches.py index 503f9426..b11ac841 100644 --- a/tests/benchmark/test_exact_matches.py +++ b/tests/benchmark/test_exact_matches.py @@ -15,8 +15,8 @@ "query,expected_urn", [ ( - "prison_population_history.chunk_assignment", - "urn:li:dataset:(urn:li:dataPlatform:dbt,awsdatacatalog.prison_population_history.chunk_assignment,PROD)", + "bold_common_platform_linked_tables.all_offence", + "urn:li:dataset:(urn:li:dataPlatform:dbt,cadet.awsdatacatalog.bold_common_platform_linked_tables.all_offence,PROD)", ), ( "Accommodation on the first night following release", diff --git a/tests/conftest.py b/tests/conftest.py index b4e6acfb..7ab28ec2 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -98,7 +98,8 @@ def generate_table_metadata( name=name or fake.unique.name(), fully_qualified_name="Foo.Dataset", description=description or fake.paragraph(), - relationships=relations or {RelationshipType.PARENT: []}, + relationships=relations + or {RelationshipType.PARENT: [], RelationshipType.DATA_LINEAGE: []}, domain=DomainRef(display_name="LAA", urn="LAA"), governance=Governance( data_owner=OwnerRef( From e4e9c826ac52baaf41b25472578ad60978981277 Mon Sep 17 00:00:00 2001 From: LavMatt Date: Mon, 10 Jun 2024 15:15:23 +0100 Subject: [PATCH 15/18] add default value for url --- home/service/details.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/home/service/details.py b/home/service/details.py index 6c316278..547bb07d 100644 --- a/home/service/details.py +++ b/home/service/details.py @@ -84,7 +84,9 @@ def __init__(self, urn: str): self.context = self._get_context() def _get_context(self): - split_datahub_url = urlsplit(os.environ["CATALOGUE_URL"]) + split_datahub_url = urlsplit( + os.getenv("CATALOGUE_URL", "https://test-catalogue.gov.uk") + ) return { "table": self.table_metadata, From d1f6caf150493348de97c5e66b106f1cea0dfd08 Mon Sep 17 00:00:00 2001 From: LavMatt Date: Mon, 10 Jun 2024 16:59:06 +0100 Subject: [PATCH 16/18] design suggestions for lineage label, from Alex and Jess --- templates/details_table.html | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/templates/details_table.html b/templates/details_table.html index 16ab2567..8f5aac4a 100644 --- a/templates/details_table.html +++ b/templates/details_table.html @@ -78,19 +78,17 @@

Table schema

The schema for this table is not available.

{% endif %} - - {% if has_lineage %} -
-

- URL -

- -
- {% endif %} + {% if has_lineage %} +

Lineage

+
+ If you are interested to find out what tables where used to create this table or if this table is used to create any further tables, you can see that information in the table lineage. +
+ + {% endif %} {% endblock content %} From f08a046593909ca52f2f4faa4fb3559adbc07884 Mon Sep 17 00:00:00 2001 From: LavMatt Date: Mon, 10 Jun 2024 17:02:54 +0100 Subject: [PATCH 17/18] spell it right --- templates/details_table.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/templates/details_table.html b/templates/details_table.html index 8f5aac4a..bfcd5c62 100644 --- a/templates/details_table.html +++ b/templates/details_table.html @@ -81,7 +81,7 @@

Table schema

{% if has_lineage %}

Lineage

- If you are interested to find out what tables where used to create this table or if this table is used to create any further tables, you can see that information in the table lineage. + If you are interested to find out what data were used to create this table or if this table is used to create any further tables, you can see that information via the lineage.