From ea4b1b1e456ed123328e32a9d585cbaec6dcd1a1 Mon Sep 17 00:00:00 2001 From: Guillaume Viger Date: Mon, 20 Nov 2023 09:25:49 -0500 Subject: [PATCH] facets: provide CombinedTermsFacet to fix #798 [+] This approach doesn't use 'nested' fields, but instead relies on a field containing to aggregate correctly. --- .gitignore | 3 + .../services/records/facets/__init__.py | 4 +- .../services/records/facets/facets.py | 228 ++++++++++++++++-- .../services/records/facets/response.py | 9 +- tests/mock_module/config.py | 12 +- .../mappings/os-v1/records/record-v1.0.0.json | 18 +- .../mappings/os-v2/records/record-v1.0.0.json | 18 +- .../mappings/v7/records/record-v1.0.0.json | 18 +- tests/mock_module/schemas.py | 12 +- tests/resources/test_resource_faceting.py | 134 +++++++++- tests/services/test_service_facets.py | 184 ++++++++------ 11 files changed, 533 insertions(+), 107 deletions(-) diff --git a/.gitignore b/.gitignore index e110aa3c..115b6df7 100644 --- a/.gitignore +++ b/.gitignore @@ -129,3 +129,6 @@ dmypy.json # Pyre type checker .pyre/ .DS_Store + +# VSCode editor +.vscode/ diff --git a/invenio_records_resources/services/records/facets/__init__.py b/invenio_records_resources/services/records/facets/__init__.py index 55f41322..ba5ab798 100644 --- a/invenio_records_resources/services/records/facets/__init__.py +++ b/invenio_records_resources/services/records/facets/__init__.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2021 CERN. +# Copyright (C) 2023 Northwestern University. # # Invenio-Records-Resources is free software; you can redistribute it and/or # modify it under the terms of the MIT License; see LICENSE file for more @@ -8,7 +9,7 @@ """Facets.""" -from .facets import CFTermsFacet, NestedTermsFacet, TermsFacet +from .facets import CFTermsFacet, CombinedTermsFacet, NestedTermsFacet, TermsFacet from .labels import RecordRelationLabels from .response import FacetsResponse @@ -17,5 +18,6 @@ "FacetsResponse", "NestedTermsFacet", "RecordRelationLabels", + "CombinedTermsFacet", "TermsFacet", ) diff --git a/invenio_records_resources/services/records/facets/facets.py b/invenio_records_resources/services/records/facets/facets.py index 26127663..b4bb3ccc 100644 --- a/invenio_records_resources/services/records/facets/facets.py +++ b/invenio_records_resources/services/records/facets/facets.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2021 CERN. +# Copyright (C) 2023 Northwestern University. # # Invenio-Records-Resources is free software; you can redistribute it and/or # modify it under the terms of the MIT License; see LICENSE file for more @@ -8,6 +9,8 @@ """Facets types defined.""" +from functools import reduce + from invenio_search.engine import dsl @@ -103,15 +106,7 @@ class NestedTermsFacet(TermsFacet): splitchar='::', label=_('Resource types'), value_labels=VocabularyL10NLabels(current_service) - ), - - 'resource_type': NestedTermsFacet( - field='metadata.resource_type.type', - subfield='metadata.resource_type.subtype', - splitchar='::', - label=_('Resource types'), - value_labels=VocabularyL10NLabels(current_service) - ), + ) } """ @@ -149,7 +144,7 @@ def _parse_values(self, filter_values): .. code-block:: python { - 'publication': ['publication::book', 'publication::journal'], + 'publication': ['book', 'journal'], 'dataset': [] } @@ -178,12 +173,10 @@ def get_value_filter(self, parsed_value): # Expects to get a value from the output of "_parse_values()"." field_value, subfield_values = parsed_value + q = dsl.Q("term", **{self._field: field_value}) if subfield_values: - return dsl.Q("term", **{self._field: field_value}) & dsl.Q( - "terms", **{self._subfield: subfield_values} - ) - else: - return dsl.Q("term", **{self._field: field_value}) + q &= dsl.Q("terms", **{self._subfield: subfield_values}) + return q def add_filter(self, filter_values): """Construct a filter query for the facet.""" @@ -246,6 +239,211 @@ def get_labelled_values( return ret_val +class CombinedTermsFacet(NestedTermsFacet): + """ + Facet to mimic a nested aggregation without having to define a 'nested' field. + + This facet is needed to prevent the "crossed wires" problem of a regular + NestedTermsFacet applied to documents with multiple 2-level objects. For example, + and the motivating use case for this facet, a "subjects" field with the + following mapping: + + .. code-block:: json + + "subjects": { + "type": "object", + "properties": { + "scheme": { + "type": "keyword" + }, + "subject": { + "type": "keyword" + } + } + } + + will lead the document with the following subjects field: + + .. code-block:: json + + "subjects": [ + {"scheme": "SC1", "subject": "SU1"}, + {"scheme": "SC2", "subject": "SU2"} + ] + + to be internally-indexed in the following manner: + + .. code-block:: json + + "subjects.scheme": ["SC1", "SC2"] + "subjects.subject": ["SU1", "SU2"] + + . This indexing loses the original pairwise relationships. This causes searches + and aggregations for scheme = SC1 and subject = SU2 to surface the above document + when they shouldn't. This is the "crossed wires" problem that this Facet class + resolves for aggregations without using "nested" types and searches (the classic + solution to this problem). + + This facet requires the following indexed format: + + .. code-block:: json + + "": ["", ...] + // may have independent "" entries + "": ["", ..., ""] + + The reasoning given for avoiding "nested" fields is to allow regular queries on + those fields that would have had to be made "nested" (only nested queries can be + done on those fields). This is a UX concern since end-users can make queries to + metadata field directly and they wouldn't be able to anymore (without a lot more + changes). + + Although this facet allows us to forego the need for a "nested" type field and + nested queries to filter on that field, it *does* do extra work that is thrown away. + See `get_aggregation` and `get_labelled_values`. + + This facet formats the result of the aggregation such that it looks like it was + a nested aggregation. + """ + + def __init__(self, field, combined_field, parents, splitchar="::", **kwargs): + """Constructor. + + :param field: top-level/parent field + :type field: str + :param combined_field: field containing combined terms + :type combined_field: str + :param groups: iterable of parent/top-level values + :type groups: Iterable[str] + :param splitchar: splitting/combining token, defaults to "::" + :type splitchar: str, optional + """ + self._field = field + self._combined_field = combined_field + self._parents = parents + self._cached_parents = None + self._splitchar = splitchar + TermsFacet.__init__(self, **kwargs) + + def get_parents(self): + """Return parents. + + We have to delay getting the parents since it may require an application + context. + """ + if not self._cached_parents: + if callable(self._parents): + self._cached_parents = self._parents() + else: + self._cached_parents = self._parents + return self._cached_parents + + def get_aggregation(self): + """Aggregate. + + This aggregation repeats ALL group subaggregation for each bucket generated + by the top-level terms aggregation. This is to overcome the + "irrelevant flooding" problem: when aggregating on a subfield, the top 10 + (by default) most frequent terms of that subfield are selected, but those + terms may not be relevant to the parent because the parent-child relationship + is lost when not using "nested". So to make sure only relevant terms are + used to select the documents in the aggregation, we "include" (filter) for them. + + Only the subaggregation corresponding to the top-level group will be kept in + get_labelled_values. + """ + return dsl.A( + { + "terms": { + "field": self._field, + "aggs": { + f"inner_{parent}": { + "terms": { + "field": self._combined_field, + "include": f"{parent}{self._splitchar}.*", + }, + } + for parent in self.get_parents() + }, + } + } + ) + + def get_labelled_values(self, data, filter_values): + """Get a labelled version of a bucket. + + :param data: Bucket data returned by document engine for a field + :type data: dsl.response.aggs.FieldBucketData + """ + + def get_child_buckets(bucket, key): + """Get lower-level/child buckets.""" + result = [] + + # Ignore other subaggregations, and only retrieve inner_{key} one. + # inner_{key} should always be present unless disconnect between + # parents passed to generate subaggregations and parents actually present. + # To not break in that case, we put a default empty list value. + inner_data = getattr(bucket, f"inner_{key}", dsl.AttrDict({"buckets": []})) + + for inner_bucket in inner_data.buckets: + # get raw key and appropriately formatted key + key_raw_inner = self.get_value(inner_bucket) + prefix = key + self._splitchar + key_inner = key_raw_inner[len(prefix):] # fmt: skip + + result.append( + { + "key": key_inner, + "doc_count": self.get_metric(inner_bucket), + "label": key_inner, + "is_selected": self.is_filtered(key_raw_inner, filter_values), + } + ) + + return result + + def get_parent_buckets(data): + """Get top-level/group buckets. + + :param data: Bucket data returned by document engine for a field + :type data: dsl.response.aggs.FieldBucketData + :return: list of labelled buckets + :rtype: List[dict] + """ + label_map = self.get_label_mapping(data.buckets) + result = [] + for bucket in data.buckets: + key = self.get_value(bucket) + result.append( + { + "key": key, + "doc_count": self.get_metric(bucket), + "label": label_map[key], + "is_selected": self.is_filtered(key, filter_values), + "inner": {"buckets": get_child_buckets(bucket, key)}, + } + ) + return result + + return {"buckets": get_parent_buckets(data), "label": str(self._label)} + + def get_value_filter(self, parsed_value): + """Return a filter for a single parsed value.""" + # Expect to get a value from the output of `_parse_values()` + field_value, subfield_values = parsed_value + + # recombine + subfield_values = [ + f"{field_value}{self._splitchar}{subvalue}" for subvalue in subfield_values + ] + + q = dsl.Q("term", **{self._field: field_value}) + if subfield_values: + q &= dsl.Q("terms", **{self._combined_field: subfield_values}) + return q + + class CFFacetMixin: """Mixin to abstract the custom fields path.""" diff --git a/invenio_records_resources/services/records/facets/response.py b/invenio_records_resources/services/records/facets/response.py index 07da05a1..ddb75580 100644 --- a/invenio_records_resources/services/records/facets/response.py +++ b/invenio_records_resources/services/records/facets/response.py @@ -47,9 +47,12 @@ class FacetsResponseForRequest(cls): def _iter_facets(self): # _facets_param instance is added to _search by the FacetsParam.apply for name, facet in self._facets_param.facets.items(): - yield name, facet, getattr( - self.aggregations, name - ), self._facets_param.selected_values.get(name, []) + yield ( + name, + facet, + getattr(self.aggregations, name), + self._facets_param.selected_values.get(name, []), + ) @property def facets(self): diff --git a/tests/mock_module/config.py b/tests/mock_module/config.py index 1d90173e..98c0cb30 100644 --- a/tests/mock_module/config.py +++ b/tests/mock_module/config.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2020-2021 CERN. -# Copyright (C) 2020-2021 Northwestern University. +# Copyright (C) 2020-2023 Northwestern University. # # Invenio-Records-Resources is free software; you can redistribute it and/or # modify it under the terms of the MIT License; see LICENSE file for more @@ -18,8 +18,8 @@ from invenio_records_resources.services.records.components import FilesComponent from invenio_records_resources.services.records.config import SearchOptions from invenio_records_resources.services.records.facets import ( + CombinedTermsFacet, NestedTermsFacet, - TermsFacet, ) from invenio_records_resources.services.records.links import ( RecordLink, @@ -44,9 +44,11 @@ class MockSearchOptions(SearchOptions): splitchar="**", label="Type", ), - "subject": TermsFacet( - field="metadata.subject", - label="Subject", + "subjects": CombinedTermsFacet( + field="metadata.subjects.scheme", + combined_field="metadata.combined_subjects", + parents=["SC1", "SC2"], + label="Subjects", ), } diff --git a/tests/mock_module/mappings/os-v1/records/record-v1.0.0.json b/tests/mock_module/mappings/os-v1/records/record-v1.0.0.json index 9ab10ced..cff83f4d 100644 --- a/tests/mock_module/mappings/os-v1/records/record-v1.0.0.json +++ b/tests/mock_module/mappings/os-v1/records/record-v1.0.0.json @@ -21,7 +21,23 @@ } } }, - "subject": { + "subjects": { + "type": "object", + "properties": { + "subject": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } + }, + "scheme": { + "type": "keyword" + } + } + }, + "combined_subjects": { "type": "keyword" }, "inner_record": { diff --git a/tests/mock_module/mappings/os-v2/records/record-v1.0.0.json b/tests/mock_module/mappings/os-v2/records/record-v1.0.0.json index 9ab10ced..cff83f4d 100644 --- a/tests/mock_module/mappings/os-v2/records/record-v1.0.0.json +++ b/tests/mock_module/mappings/os-v2/records/record-v1.0.0.json @@ -21,7 +21,23 @@ } } }, - "subject": { + "subjects": { + "type": "object", + "properties": { + "subject": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } + }, + "scheme": { + "type": "keyword" + } + } + }, + "combined_subjects": { "type": "keyword" }, "inner_record": { diff --git a/tests/mock_module/mappings/v7/records/record-v1.0.0.json b/tests/mock_module/mappings/v7/records/record-v1.0.0.json index 9ab10ced..cff83f4d 100644 --- a/tests/mock_module/mappings/v7/records/record-v1.0.0.json +++ b/tests/mock_module/mappings/v7/records/record-v1.0.0.json @@ -21,7 +21,23 @@ } } }, - "subject": { + "subjects": { + "type": "object", + "properties": { + "subject": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } + }, + "scheme": { + "type": "keyword" + } + } + }, + "combined_subjects": { "type": "keyword" }, "inner_record": { diff --git a/tests/mock_module/schemas.py b/tests/mock_module/schemas.py index 4eaafd8e..4b34e61d 100644 --- a/tests/mock_module/schemas.py +++ b/tests/mock_module/schemas.py @@ -2,7 +2,7 @@ # # This file is part of Invenio. # Copyright (C) 2021 CERN. -# Copyright (C) 2021 Northwestern University. +# Copyright (C) 2021-2023 Northwestern University. # # Invenio-Records-Resources is free software; you can redistribute it and/or # modify it under the terms of the MIT License; see LICENSE file for more @@ -24,6 +24,13 @@ class TypeSchema(Schema): subtype = fields.Str() +class SubjectSchema(Schema): + """Nested subject schema used for faceting tests.""" + + scheme = fields.Str() + subject = fields.Str() + + class ReferencedCreatedBySchema(Schema): """Nested type schema for fake created by field.""" @@ -48,7 +55,8 @@ class MetadataSchema(Schema): title = fields.Str(required=True, validate=validate.Length(min=3)) type = fields.Nested(TypeSchema) - subject = fields.Str() + subjects = fields.List(fields.Nested(SubjectSchema)) + combined_subjects = fields.List(fields.Str) inner_record = fields.Dict() # referenced records referenced_created_by = fields.Nested(ReferencedCreatedBySchema) diff --git a/tests/resources/test_resource_faceting.py b/tests/resources/test_resource_faceting.py index 2be4e2a0..787feb8d 100644 --- a/tests/resources/test_resource_faceting.py +++ b/tests/resources/test_resource_faceting.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2020 CERN. -# Copyright (C) 2020 Northwestern University. +# Copyright (C) 2020-2023 Northwestern University. # # Invenio-Records-Resources is free software; you can redistribute it and/or # modify it under the terms of the MIT License; see LICENSE file for more @@ -21,8 +21,8 @@ # 2- links are generated -@pytest.fixture(scope="module") -def three_indexed_records(app, identity_simple, search): +@pytest.fixture() +def three_indexed_records(app, identity_simple, search_clear): # NOTE: search is used (and not search_clear) here because all tests # assume 3 records have been indexed and NO tests in this module # adds/deletes any. @@ -51,9 +51,9 @@ def test_aggregating(client, headers, three_indexed_records): response_aggs = response.json["aggregations"] expected_aggs = { - "subject": { + "subjects": { "buckets": [], - "label": "Subject", + "label": "Subjects", }, "type": { "label": "Type", @@ -101,9 +101,9 @@ def test_post_filtering(client, headers, three_indexed_records): # Test aggregation is the same response_aggs = response.json["aggregations"] expected_aggs = { - "subject": { + "subjects": { "buckets": [], - "label": "Subject", + "label": "Subjects", }, "type": { "label": "Type", @@ -150,6 +150,126 @@ def test_post_filtering(client, headers, three_indexed_records): ) +def test_nested_post_filtering(client, headers, identity_simple, service, search_clear): + data = { + "metadata": { + "title": "SU1 + (SC1, SU2)", + "subjects": [ + {"subject": "SU1"}, # should be ignored in aggregation results + {"scheme": "SC1", "subject": "SU2"}, + ], + "combined_subjects": ["SC1::SU2"], + }, + } + service.create(identity_simple, data) + data = { + "metadata": { + "title": "(SC1, SU4) + (SC2, SU2)", + "subjects": [ + {"scheme": "SC1", "subject": "SU4"}, + {"scheme": "SC2", "subject": "SU2"}, + ], + "combined_subjects": ["SC1::SU4", "SC2::SU2"], + }, + } + service.create(identity_simple, data) + data = { + "metadata": { + "title": "(SC2, SU3)", + "subjects": [ + {"scheme": "SC2", "subject": "SU3"}, + ], + "combined_subjects": ["SC2::SU3"], + }, + } + service.create(identity_simple, data) + Record.index.refresh() + + # First scenario + # Test that: + # - different subjects/subfields but same scheme result in a union post_filter + # (a record that only has 1 of the scheme+subject should be selected) + # - hierarchical dependency is enforced (selects for scheme AND subject together) + response = client.get("/mocks?subjects=SC1::SU2&subjects=SC1::SU3", headers=headers) + resource_aggs = response.json["aggregations"] + + expected_aggs = { + "subjects": { + "buckets": [ + { + "doc_count": 2, + "inner": { + "buckets": [ + { + "doc_count": 1, + "is_selected": True, + "key": "SU2", + "label": "SU2", + }, + { + "doc_count": 1, + "is_selected": False, + "key": "SU4", + "label": "SU4", + }, + ] + }, + "is_selected": False, # only selected if SC1 is separately passed + "key": "SC1", + "label": "SC1", + }, + { + "doc_count": 2, + "inner": { + "buckets": [ + { + "doc_count": 1, + "is_selected": False, + "key": "SU2", + "label": "SU2", + }, + { + "doc_count": 1, + "is_selected": False, + "key": "SU3", + "label": "SU3", + }, + ] + }, + "is_selected": False, + "key": "SC2", + "label": "SC2", + }, + ], + "label": "Subjects", + }, + "type": { + "buckets": [], + "label": "Type", + }, + } + assert expected_aggs == resource_aggs + hits = response.json["hits"]["hits"] + assert 1 == len(hits) + assert "SU1 + (SC1, SU2)" == hits[0]["metadata"]["title"] + + # 2nd scenario + # Test that: + # - different schemes/fields result in a union post_filter as well + # (a record that only has 1 of the scheme+subject should be selected) + response = client.get("/mocks?subjects=SC1::SU1&subjects=SC2::SU3", headers=headers) + resource_aggs = response.json["aggregations"] + + # Reformat expected_aggs + inner = expected_aggs["subjects"]["buckets"][0]["inner"] + inner["buckets"][0]["is_selected"] = False + expected_aggs["subjects"]["buckets"][1]["inner"]["buckets"][1]["is_selected"] = True + assert expected_aggs == resource_aggs + hits = response.json["hits"]["hits"] + assert 1 == len(hits) + assert "(SC2, SU3)" == hits[0]["metadata"]["title"] + + # # 2- links are generated # diff --git a/tests/services/test_service_facets.py b/tests/services/test_service_facets.py index c1a96e1a..4e17147a 100644 --- a/tests/services/test_service_facets.py +++ b/tests/services/test_service_facets.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # # Copyright (C) 2020 CERN. -# Copyright (C) 2020 Northwestern University. +# Copyright (C) 2020-2023 Northwestern University. # Copyright (C) 2022 Graz University of Technology. # # Invenio-Records-Resources is free software; you can redistribute it and/or @@ -17,8 +17,8 @@ # # Fixtures # -@pytest.fixture(scope="module") -def records(app, service, identity_simple): +@pytest.fixture() +def records(app, service, identity_simple, search_clear): """Input data (as coming from the view layer).""" items = [] for idx in range(2): @@ -26,7 +26,6 @@ def records(app, service, identity_simple): "metadata": { "title": f"00{idx}", "type": {"type": f"Foo{idx}", "subtype": f"Bar{idx}"}, - "subject": f"Subject{idx}", }, } items.append(service.create(identity_simple, data)) @@ -38,28 +37,14 @@ def records(app, service, identity_simple): # Tests # def test_facets(app, service, identity_simple, records): - """Create a record.""" # Search it res = service.search(identity_simple) service_aggs = res.aggregations expected_aggs = { - "subject": { - "buckets": [ - { - "doc_count": 1, - "is_selected": False, - "key": "Subject0", - "label": "Subject0", - }, - { - "doc_count": 1, - "is_selected": False, - "key": "Subject1", - "label": "Subject1", - }, - ], - "label": "Subject", + "subjects": { + "buckets": [], + "label": "Subjects", }, "type": { "buckets": [ @@ -99,7 +84,6 @@ def test_facets(app, service, identity_simple, records): "label": "Type", }, } - assert expected_aggs == service_aggs @@ -109,22 +93,9 @@ def test_facets_post_filtering_union(app, service, identity_simple, records): res = service.search(identity_simple, facets={"type": ["Foo0", "Foo1"]}) service_aggs = res.aggregations expected_aggs = { - "subject": { - "buckets": [ - { - "doc_count": 1, - "is_selected": False, - "key": "Subject0", - "label": "Subject0", - }, - { - "doc_count": 1, - "is_selected": False, - "key": "Subject1", - "label": "Subject1", - }, - ], - "label": "Subject", + "subjects": { + "buckets": [], + "label": "Subjects", }, "type": { "label": "Type", @@ -174,26 +145,13 @@ def test_facets_post_filtering_intersection(app, service, identity_simple, recor """Different facets should result in intersection of results.""" # No records match both facets res = service.search( - identity_simple, facets={"type": ["Foo1"], "subject": ["Subject0"]} + identity_simple, facets={"type": ["Foo1"], "subjects": ["Subject0"]} ) service_aggs = res.aggregations expected_aggs = { - "subject": { - "buckets": [ - { - "doc_count": 1, - "is_selected": True, - "key": "Subject0", - "label": "Subject0", - }, - { - "doc_count": 1, - "is_selected": False, - "key": "Subject1", - "label": "Subject1", - }, - ], - "label": "Subject", + "subjects": { + "buckets": [], + "label": "Subjects", }, "type": { "label": "Type", @@ -244,22 +202,9 @@ def test_facets_post_filtering(app, service, identity_simple, records): res = service.search(identity_simple, facets={"type": ["Foo1"]}) service_aggs = res.aggregations expected_aggs = { - "subject": { - "buckets": [ - { - "doc_count": 1, - "is_selected": False, - "key": "Subject0", - "label": "Subject0", - }, - { - "doc_count": 1, - "is_selected": False, - "key": "Subject1", - "label": "Subject1", - }, - ], - "label": "Subject", + "subjects": { + "buckets": [], + "label": "Subjects", }, "type": { "label": "Type", @@ -303,3 +248,100 @@ def test_facets_post_filtering(app, service, identity_simple, records): # Test hits are filtered assert 1 == len(res) assert set(["001"]) == set([h["metadata"]["title"] for h in res]) + + +def test_combined_terms_facets(app, service, identity_simple, search_clear): + # Create records with nested patterns of interest + data = { + "metadata": { + "title": "SU1 + (SC1, SU2)", + "subjects": [ + {"subject": "SU1"}, # should be ignored in aggregation results + {"scheme": "SC1", "subject": "SU2"}, + ], + # Note that you would typically want to have a mechanism in place to + # auto-fill this field based on the values in "subjects" + "combined_subjects": [ + "SU1", # should be ignored in aggregation results + "SC1::SU2", + ], + }, + } + service.create(identity_simple, data) + data = { + "metadata": { + "title": "(SC1, SU2) + (SC2, SU3)", + "subjects": [ + {"scheme": "SC1", "subject": "SU2"}, + {"scheme": "SC2", "subject": "SU3"}, + ], + "combined_subjects": ["SC1::SU2", "SC2::SU3"], + }, + } + service.create(identity_simple, data) + data = { + "metadata": { + "title": "(SC1, SU3)", + "subjects": [ + {"scheme": "SC1", "subject": "SU3"}, + ], + "combined_subjects": ["SC1::SU3"], + }, + } + service.create(identity_simple, data) + Record.index.refresh() + + # Action + res = service.search(identity_simple) + service_aggs = res.aggregations + + expected_aggs = { + "subjects": { + "buckets": [ + { + "doc_count": 3, + "inner": { + "buckets": [ + { + "doc_count": 2, + "is_selected": False, + "key": "SU2", + "label": "SU2", + }, + { + "doc_count": 1, + "is_selected": False, + "key": "SU3", + "label": "SU3", + }, + ] + }, + "is_selected": False, + "key": "SC1", + "label": "SC1", + }, + { + "doc_count": 1, + "inner": { + "buckets": [ + { + "doc_count": 1, + "is_selected": False, + "key": "SU3", + "label": "SU3", + }, + ] + }, + "is_selected": False, + "key": "SC2", + "label": "SC2", + }, + ], + "label": "Subjects", + }, + "type": { + "buckets": [], + "label": "Type", + }, + } + assert expected_aggs == service_aggs