Skip to content

Commit 39ebaf6

Browse files
MatMooremitchdawson1982
authored andcommitted
Populate domains drop down with what's been ingested in datahub (#407)
* Add missing domain information from charts * Update search tests that hit datahub dev - remove entity which is not currently present - enable the no_duplicates test (we have fixed this) * Load the list of domains from Datahub Previously we hardcoded the list of domains shown in the search filter, and had different lists per environment. This was useful in alpha when we had some junk domains we wanted to filter out, but now we're at a point where every domain in Datahub should be one we want to use. This commit means we now fetch every domain that has something linked to it, and display that in alphabetical order. * Move domain model to models and remove unused model * Refacotr: decouple SearchFacetFetcher from DomainModel * Cache facets fetched from datahub Ideally we would just fetch the facets once per request, but in practice we do this from a few different places. 1. In the view we instantiate a SearchService, which uses the domain model in constructing filters for Datahub. 2. The SearchForm also needs them to know what choices are valid, so we need to pass a callback to the form's ChoiceField. That callback does not share any data with the view. Caching the value is a quick way to avoid making extra requests for the same data. * Hide subdomains if there aren't any defined This is the case at the moment, because the domain model we've pulled in from CaDeT doesn't have subdomains. This might change later though so I don't want to remove the subdomain code completely. * Include missing domains Previously it was only returning domains with tables in. We should include any that show as non-empty in Find MOJ Data.
1 parent 709dadb commit 39ebaf6

20 files changed

+203
-206
lines changed

core/settings.py

+6
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,12 @@
181181
},
182182
}
183183

184+
CACHES = {
185+
"default": {
186+
"BACKEND": "django.core.cache.backends.locmem.LocMemCache",
187+
}
188+
}
189+
184190
ANALYTICS_ID: str = os.environ.get("ANALYTICS_ID", "")
185191
ENABLE_ANALYTICS: bool = (
186192
os.environ.get("ENABLE_ANALYTICS") in TRUTHY_VALUES

home/forms/domain_model.py

-135
This file was deleted.

home/forms/search.py

+10-5
Original file line numberDiff line numberDiff line change
@@ -4,21 +4,24 @@
44
from data_platform_catalogue.search_types import ResultType
55
from django import forms
66

7-
from .domain_model import Domain, DomainModel
7+
from ..models.domain_model import Domain, DomainModel
8+
from ..service.search_facet_fetcher import SearchFacetFetcher
89

910

1011
def get_domain_choices() -> list[Domain]:
1112
"""Make API call to obtain domain choices"""
1213
choices = [
1314
Domain("", "All domains"),
1415
]
15-
choices.extend(DomainModel().top_level_domains)
16+
facets = SearchFacetFetcher().fetch()
17+
choices.extend(DomainModel(facets).top_level_domains)
1618
return choices
1719

1820

1921
def get_subdomain_choices() -> list[Domain]:
2022
choices = [Domain("", "All subdomains")]
21-
choices.extend(DomainModel().all_subdomains())
23+
facets = SearchFacetFetcher().fetch()
24+
choices.extend(DomainModel(facets).all_subdomains())
2225
return choices
2326

2427

@@ -47,8 +50,7 @@ def get_entity_types():
4750
class SelectWithOptionAttribute(forms.Select):
4851
def __init__(self, *args, **kwargs):
4952
super().__init__(*args, **kwargs)
50-
51-
self.domain_model = DomainModel()
53+
self.domain_model = None
5254

5355
def create_option(
5456
self, name, urn, label, selected, index, subindex=None, attrs=None
@@ -57,6 +59,9 @@ def create_option(
5759
name, urn, label, selected, index, subindex, attrs
5860
)
5961

62+
facets = SearchFacetFetcher().fetch()
63+
self.domain_model = self.domain_model or DomainModel(facets)
64+
6065
if urn:
6166
option["attrs"]["data-parent"] = self.domain_model.get_parent_urn(urn)
6267

home/helper.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
from data_platform_catalogue.search_types import ResultType
2+
3+
14
def filter_seleted_domains(domain_list, domains):
25
selected_domain = {}
36
for domain in domain_list:
@@ -7,6 +10,8 @@ def filter_seleted_domains(domain_list, domains):
710

811

912
def get_domain_list(client):
10-
facets = client.search_facets()
13+
facets = client.search_facets(
14+
results_types=[ResultType.TABLE, ResultType.CHART, ResultType.DATABASE]
15+
)
1116
domain_list = facets.options("domain")
1217
return domain_list
+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# Generated by Django 5.0.6 on 2024-06-10 09:39
2+
3+
from django.db import migrations
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
("home", "0001_initial"),
10+
]
11+
12+
operations = [
13+
migrations.DeleteModel(
14+
name="Catalogue",
15+
),
16+
]

home/models.py

-8
This file was deleted.

home/models/__init__.py

Whitespace-only changes.

home/models/domain_model.py

+51
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
import logging
2+
from typing import NamedTuple
3+
4+
from data_platform_catalogue.search_types import SearchFacets
5+
6+
logger = logging.getLogger(__name__)
7+
8+
9+
class Domain(NamedTuple):
10+
urn: str
11+
label: str
12+
13+
14+
class DomainModel:
15+
"""
16+
Store information about domains and subdomains
17+
"""
18+
19+
def __init__(self, search_facets: SearchFacets):
20+
self.labels = {}
21+
22+
self.top_level_domains = [
23+
Domain(option.value, option.label)
24+
for option in search_facets.options("domains")
25+
]
26+
self.top_level_domains.sort(key=lambda d: d.label)
27+
28+
logger.info(f"{self.top_level_domains=}")
29+
30+
self.subdomains = {}
31+
32+
for urn, label in self.top_level_domains:
33+
self.labels[urn] = label
34+
35+
def all_subdomains(self) -> list[Domain]: # -> list[Any]
36+
"""
37+
A flat list of all subdomains
38+
"""
39+
subdomains = []
40+
for domain_choices in self.subdomains.values():
41+
subdomains.extend(domain_choices)
42+
return subdomains
43+
44+
def get_parent_urn(self, child_subdomain_urn) -> str | None:
45+
for domain, subdomains in self.subdomains.items():
46+
for subdomain in subdomains:
47+
if child_subdomain_urn == subdomain.urn:
48+
return domain
49+
50+
def get_label(self, urn):
51+
return self.labels.get(urn, urn)

home/service/search.py

+11-5
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,16 @@
1212
from django.core.paginator import Paginator
1313
from nltk.stem import PorterStemmer
1414

15-
from home.forms.domain_model import DomainModel
1615
from home.forms.search import SearchForm
16+
from home.models.domain_model import DomainModel
1717

1818
from .base import GenericService
19+
from .search_facet_fetcher import SearchFacetFetcher
1920

2021

21-
def domains_with_their_subdomains(domain: str, subdomain: str) -> list[str]:
22+
def domains_with_their_subdomains(
23+
domain: str, subdomain: str, domain_model: DomainModel
24+
) -> list[str]:
2225
"""
2326
Users can search by domain, and optionally by subdomain.
2427
When subdomain is passed, then we can filter on that directly.
@@ -30,14 +33,15 @@ def domains_with_their_subdomains(domain: str, subdomain: str) -> list[str]:
3033
if subdomain:
3134
return [subdomain]
3235

33-
subdomains = DomainModel().subdomains.get(domain, [])
36+
subdomains = domain_model.subdomains.get(domain, [])
3437
subdomains = [subdomain[0] for subdomain in subdomains]
3538
return [domain, *subdomains] if not domain == "" else []
3639

3740

3841
class SearchService(GenericService):
3942
def __init__(self, form: SearchForm, page: str, items_per_page: int = 20):
40-
self.domain_model = DomainModel()
43+
facets = SearchFacetFetcher().fetch()
44+
self.domain_model = DomainModel(facets)
4145
self.stemmer = PorterStemmer()
4246
self.form = form
4347
if self.form.is_bound:
@@ -76,7 +80,9 @@ def _get_search_results(self, page: str, items_per_page: int) -> SearchResponse:
7680
sort = form_data.get("sort", "relevance")
7781
domain = form_data.get("domain", "")
7882
subdomain = form_data.get("subdomain", "")
79-
domains_and_subdomains = domains_with_their_subdomains(domain, subdomain)
83+
domains_and_subdomains = domains_with_their_subdomains(
84+
domain, subdomain, self.domain_model
85+
)
8086
where_to_access = self._build_custom_property_filter(
8187
"whereToAccessDataset=", form_data.get("where_to_access", [])
8288
)

home/service/search_facet_fetcher.py

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
from data_platform_catalogue.search_types import SearchFacets
2+
from django.core.cache import cache
3+
4+
from .base import GenericService
5+
6+
7+
class SearchFacetFetcher(GenericService):
8+
def __init__(self):
9+
self.client = self._get_catalogue_client()
10+
self.cache_key = "search_facets"
11+
self.cache_timeout_seconds = 5
12+
13+
def fetch(self) -> SearchFacets:
14+
"""
15+
Fetch a static list of options that is independent of the search query
16+
and any applied filters. Values are cached for 5 seconds to avoid
17+
unnecessary queries.
18+
"""
19+
result = cache.get(self.cache_key)
20+
if not result:
21+
result = self.client.search_facets()
22+
cache.set(self.cache_key, result, timeout=self.cache_timeout_seconds)
23+
24+
return result

lib/datahub-client/CHANGELOG.md

+4
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@ All notable changes to this project will be documented in this file.
77
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
88
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
99

10+
## Unreleased
11+
12+
- Return domain metadata for Charts
13+
1014
## [1.0.1] 2024-05-07
1115

1216
Change of build repo and several bug fixes following the refactor.

lib/datahub-client/data_platform_catalogue/client/graphql/getChartDetails.graphql

+10
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,16 @@ query getChartDetails($urn: String!) {
55
platform {
66
name
77
}
8+
domain {
9+
domain {
10+
urn
11+
id
12+
properties {
13+
name
14+
description
15+
}
16+
}
17+
}
818
ownership {
919
owners {
1020
owner {

0 commit comments

Comments
 (0)