Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Filter container entities on dc_display_in_catalogue #509

Merged
merged 3 commits into from
Jun 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 10 additions & 4 deletions lib/datahub-client/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,21 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## Unreleased

### Changed

- Search results and container entities are now filtered to those that include
a special tag, dc:display_in_catalogue. This allows the frontend to display
only the entities we think users are directly interested in, while still
ingesting intermediate tables (which are still relevant in the context of
lineage, governance, data quality etc.)
- Return lists of objects for `SearchResult.tags` and
`SearchResult.tags_to_display` instead of strings.

### Added

- Return domain metadata for Charts
- Add `glossary_terms` list to `SearchResult`

### Changed

- Return lists of objects for `SearchResult.tags` and `SearchResult.tags_to_display` instead of strings.

## Removed

- Removed all remaining references to Data Products
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,14 @@ def get_database_details(self, urn: str) -> Database:
)
datasets = []
if response["entities"]["total"] > 0:
datasets: list = response["entities"]["searchResults"]
datasets: list = [
entity
for entity in response["entities"]["searchResults"]
if any(
tag.urn == "urn:li:tag:dc_display_in_catalogue"
for tag in parse_tags(entity["entity"])
)
]

return Database(
urn=urn,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ query getContainer($urn: String!) {
subTypes {
typeNames
}
entities(input: {start: 0, count: 500}) {
entities(input: { start: 0, count: 500 }) {
total
searchResults {
entity {
Expand Down Expand Up @@ -171,6 +171,9 @@ fragment datasetDetails on Dataset {
editableProperties {
description
}
tags {
...globalTagsFields
}
}

fragment entityContainer on Container {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def parse_owner(entity: dict[str, Any]) -> OwnerRef:
else properties.get("fullName", "")
)
owner_details = OwnerRef(
display_name=display_name,
display_name=display_name or "",
email=properties.get("email", ""),
urn=owners[0].get("urn", ""),
)
Expand Down
90 changes: 90 additions & 0 deletions lib/datahub-client/tests/client/datahub/test_datahub_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from unittest.mock import MagicMock, patch

import pytest

from data_platform_catalogue.client.datahub_client import (
DataHubCatalogueClient,
InvalidDomain,
Expand Down Expand Up @@ -500,6 +501,95 @@ def test_get_chart_details(self, datahub_client, base_mock_graph):
external_url="https://data.justice.gov.uk/prisons/public-protection/absconds",
)

def test_get_database_details_filters_entities(
self, datahub_client, base_mock_graph
):
urn = "urn:li:container:foo"
datahub_response = {
"container": {
"urn": "urn:li:container",
"type": "CONTAINER",
"platform": {"name": "platform"},
"parentContainers": {
"count": 0,
},
"entities": {
"total": 2,
"searchResults": [
{
"entity": {
"name": "DatasetToShow",
"properties": {
"name": "DatasetToShow",
"description": "Dataset to show",
},
"tags": {
"tags": [
{
"tag": {
"urn": "urn:li:tag:dc_display_in_catalogue",
"properties": {
"name": "dc:display_in_catalogue",
},
}
}
]
},
}
},
{
"entity": {
"name": "DatasetToHide",
"properties": {
"name": "DatasetToHide",
"description": "Dataset to hide",
},
"tags": {"tags": []},
}
},
],
},
"ownership": None,
"properties": {
"name": "Some database",
"description": "a test description",
"customProperties": [],
"lastModified": {"time": 0},
},
},
"extensions": {},
}
base_mock_graph.execute_graphql = MagicMock(return_value=datahub_response)

with patch(
"data_platform_catalogue.client.datahub_client.DataHubCatalogueClient.check_entity_exists_by_urn"
) as mock_exists:
mock_exists.return_value = True
database = datahub_client.get_database_details(urn)
assert database.tables == [
{
"entity": {
"name": "DatasetToShow",
"properties": {
"description": "Dataset to show",
"name": "DatasetToShow",
},
"tags": {
"tags": [
{
"tag": {
"properties": {
"name": "dc:display_in_catalogue",
},
"urn": "urn:li:tag:dc_display_in_catalogue",
},
},
],
},
}
}
]

def test_upsert_table_and_database(
self,
datahub_client,
Expand Down
Loading