Skip to content

Commit 6d5365a

Browse files
MatMooreLavMatt
andauthored
Filter container entities on dc_display_in_catalogue (#509)
* Filter container entities on dc_display_in_catalogue * Handle edge case where display_name is None via https://ministryofjustice.sentry.io/issues/5529696082/?project=4507181591101440&referrer=github-open-pr-bot --------- Co-authored-by: Matt <38562764+LavMatt@users.noreply.github.com>
1 parent b2b03a6 commit 6d5365a

File tree

5 files changed

+113
-7
lines changed

5 files changed

+113
-7
lines changed

lib/datahub-client/CHANGELOG.md

+10-4
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,21 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
99

1010
## Unreleased
1111

12+
### Changed
13+
14+
- Search results and container entities are now filtered to those that include
15+
a special tag, dc:display_in_catalogue. This allows the frontend to display
16+
only the entities we think users are directly interested in, while still
17+
ingesting intermediate tables (which are still relevant in the context of
18+
lineage, governance, data quality etc.)
19+
- Return lists of objects for `SearchResult.tags` and
20+
`SearchResult.tags_to_display` instead of strings.
21+
1222
### Added
1323

1424
- Return domain metadata for Charts
1525
- Add `glossary_terms` list to `SearchResult`
1626

17-
### Changed
18-
19-
- Return lists of objects for `SearchResult.tags` and `SearchResult.tags_to_display` instead of strings.
20-
2127
## Removed
2228

2329
- Removed all remaining references to Data Products

lib/datahub-client/data_platform_catalogue/client/datahub_client.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -339,7 +339,14 @@ def get_database_details(self, urn: str) -> Database:
339339
)
340340
datasets = []
341341
if response["entities"]["total"] > 0:
342-
datasets: list = response["entities"]["searchResults"]
342+
datasets: list = [
343+
entity
344+
for entity in response["entities"]["searchResults"]
345+
if any(
346+
tag.urn == "urn:li:tag:dc_display_in_catalogue"
347+
for tag in parse_tags(entity["entity"])
348+
)
349+
]
343350

344351
return Database(
345352
urn=urn,

lib/datahub-client/data_platform_catalogue/client/graphql/getContainerDetails.graphql

+4-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ query getContainer($urn: String!) {
3030
subTypes {
3131
typeNames
3232
}
33-
entities(input: {start: 0, count: 500}) {
33+
entities(input: { start: 0, count: 500 }) {
3434
total
3535
searchResults {
3636
entity {
@@ -171,6 +171,9 @@ fragment datasetDetails on Dataset {
171171
editableProperties {
172172
description
173173
}
174+
tags {
175+
...globalTagsFields
176+
}
174177
}
175178

176179
fragment entityContainer on Container {

lib/datahub-client/data_platform_catalogue/client/graphql_helpers.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ def parse_owner(entity: dict[str, Any]) -> OwnerRef:
3636
else properties.get("fullName", "")
3737
)
3838
owner_details = OwnerRef(
39-
display_name=display_name,
39+
display_name=display_name or "",
4040
email=properties.get("email", ""),
4141
urn=owners[0].get("urn", ""),
4242
)

lib/datahub-client/tests/client/datahub/test_datahub_client.py

+90
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from unittest.mock import MagicMock, patch
44

55
import pytest
6+
67
from data_platform_catalogue.client.datahub_client import (
78
DataHubCatalogueClient,
89
InvalidDomain,
@@ -500,6 +501,95 @@ def test_get_chart_details(self, datahub_client, base_mock_graph):
500501
external_url="https://data.justice.gov.uk/prisons/public-protection/absconds",
501502
)
502503

504+
def test_get_database_details_filters_entities(
505+
self, datahub_client, base_mock_graph
506+
):
507+
urn = "urn:li:container:foo"
508+
datahub_response = {
509+
"container": {
510+
"urn": "urn:li:container",
511+
"type": "CONTAINER",
512+
"platform": {"name": "platform"},
513+
"parentContainers": {
514+
"count": 0,
515+
},
516+
"entities": {
517+
"total": 2,
518+
"searchResults": [
519+
{
520+
"entity": {
521+
"name": "DatasetToShow",
522+
"properties": {
523+
"name": "DatasetToShow",
524+
"description": "Dataset to show",
525+
},
526+
"tags": {
527+
"tags": [
528+
{
529+
"tag": {
530+
"urn": "urn:li:tag:dc_display_in_catalogue",
531+
"properties": {
532+
"name": "dc:display_in_catalogue",
533+
},
534+
}
535+
}
536+
]
537+
},
538+
}
539+
},
540+
{
541+
"entity": {
542+
"name": "DatasetToHide",
543+
"properties": {
544+
"name": "DatasetToHide",
545+
"description": "Dataset to hide",
546+
},
547+
"tags": {"tags": []},
548+
}
549+
},
550+
],
551+
},
552+
"ownership": None,
553+
"properties": {
554+
"name": "Some database",
555+
"description": "a test description",
556+
"customProperties": [],
557+
"lastModified": {"time": 0},
558+
},
559+
},
560+
"extensions": {},
561+
}
562+
base_mock_graph.execute_graphql = MagicMock(return_value=datahub_response)
563+
564+
with patch(
565+
"data_platform_catalogue.client.datahub_client.DataHubCatalogueClient.check_entity_exists_by_urn"
566+
) as mock_exists:
567+
mock_exists.return_value = True
568+
database = datahub_client.get_database_details(urn)
569+
assert database.tables == [
570+
{
571+
"entity": {
572+
"name": "DatasetToShow",
573+
"properties": {
574+
"description": "Dataset to show",
575+
"name": "DatasetToShow",
576+
},
577+
"tags": {
578+
"tags": [
579+
{
580+
"tag": {
581+
"properties": {
582+
"name": "dc:display_in_catalogue",
583+
},
584+
"urn": "urn:li:tag:dc_display_in_catalogue",
585+
},
586+
},
587+
],
588+
},
589+
}
590+
}
591+
]
592+
503593
def test_upsert_table_and_database(
504594
self,
505595
datahub_client,

0 commit comments

Comments
 (0)