Merge pull request #205 from FZJ-INM1-BDA/feat_addDescRatMouse

xgui3783 · web-flow · commit ec279322dedd · 2022-07-20T14:53:12.000+02:00
bugfix: add desc to waxholm &amp; allen
diff --git a/siibra/__init__.py b/siibra/__init__.py
@@ -16,7 +16,7 @@
 from .commons import logger, QUIET, VERBOSE
 
 # __version__ is parsed by setup.py
-__version__ = "0.3a23"
+__version__ = "0.3a24"
 logger.info(f"Version: {__version__}")
 logger.warning("This is a development release. Use at your own risk.")
 logger.info(
diff --git a/siibra/core/datasets.py b/siibra/core/datasets.py
@@ -13,16 +13,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import hashlib
 from .serializable_concept import JSONSerializable
 from ..commons import logger
 from ..retrieval import EbrainsKgQuery
+from ..retrieval.requests import DECODERS, EbrainsRequest
 from ..openminds.core.v4.products.datasetVersion import Model as DatasetVersionModel
 from ..openminds.base import ConfigBaseModel
 
+import hashlib
 import re
 from datetime import date
-from typing import List, Optional
+from typing import Any, Dict, List, Optional
 from pydantic import Field
 
 class Url(ConfigBaseModel):
@@ -185,6 +186,108 @@ def _from_json(cls, spec):
         )
 
 
+class _EbrainsKgV3Base:
+    BASE_URL = "https://core.kg.ebrains.eu/v3-beta/queries"
+    QUERY_ID = None
+    STAGE = "RELEASED"
+
+    def __init__(self, _id_spec: Dict[str, Any]) -> None:
+        self._id_spec = _id_spec
+        self._spec = None
+        
+
+    @classmethod
+    def _query(cls, spec: Dict[str, Any]=None):
+        
+        # for easy compatibility with data returned by KG
+        # KG no longer uses @id for key, but instead id (by default)
+        # also id prepends domain information (e.g. https://kg.ebrains.eu/api/instances/{uuid})
+        # therefore, also extract the uuid protion
+
+        if spec is not None:
+            at_id=spec.get("@id")
+            kg_id = spec.get("id")
+            kg_id_search = kg_id and re.search(r'[a-f0-9-]+$', kg_id)
+        
+            assert at_id is not None or kg_id_search is not None
+            uuid = at_id or kg_id_search.group()
+
+        assert hasattr(cls, 'type_id')
+        
+        # for easy compatibility with data returned by KG
+        # KG no longer uses @type for key, but type
+        # also type is List[str]
+        assert spec.get("@type") == cls.type_id or any ([t == cls.type_id for t in spec.get("type", [])])
+        
+        
+        url=f"{cls.BASE_URL}/{cls.QUERY_ID}/instances?stage={cls.STAGE}"
+        if spec is not None:
+            url += f"&instanceId={uuid}"
+
+        result = EbrainsRequest(url, DECODERS['.json']).get()
+
+        assert 'data' in result
+
+        if spec is not None:
+            assert result.get('total') == 1
+            assert result.get('size') == 1
+            return result.get("data")[0]
+
+        return result.get('data', [])
+        
+
+class EbrainsKgV3Dataset(Dataset, _EbrainsKgV3Base, type_id="https://openminds.ebrains.eu/core/Dataset"):
+    BASE_URL = "https://core.kg.ebrains.eu/v3-beta/queries"
+    QUERY_ID = "138111f9-1aa4-43f5-8e0a-6e6ed085fa3e"
+
+    def __init__(self, spec: Dict[str, Any]):
+        super().__init__(None)
+        found = re.search(r'[a-f0-9-]+$', spec.get('id'))
+        assert found
+        self.id = found.group()
+        self._description_cached = spec.get("description")
+        self._spec = spec
+
+    @classmethod
+    def _from_json(cls, spec: Dict[str, Any]):
+        json_obj = cls._query(spec)
+        return cls(json_obj)
+
+
+class EbrainsKgV3DatasetVersion(Dataset, _EbrainsKgV3Base, type_id="https://openminds.ebrains.eu/core/DatasetVersion"):
+    
+    BASE_URL = "https://core.kg.ebrains.eu/v3-beta/queries"
+    QUERY_ID = "f7489d01-2f90-410c-9812-9ee7d10cc5be"
+
+    def __init__(self, _id_spec: Dict[str, Any]):
+        _EbrainsKgV3Base.__init__(self, _id_spec)
+        Dataset.__init__(self, None)
+
+    @classmethod
+    def _from_json(cls, spec: Dict[str, Any]):
+        return cls(spec)
+    
+    @property
+    def description(self):
+        if self._spec is None:
+            self._spec = self._query(self._id_spec)
+        
+        self._description_cached = self._spec.get("description")
+        
+        if self._description_cached is not None and self._description_cached != '':
+            return self._description_cached
+
+        parent_datasets = self._spec.get("belongsTo", [])
+        if len(parent_datasets) == 0:
+            return None
+        if len(parent_datasets) > 1:
+            logger.warn(f"EbrainsKgV3DatasetVersion.description: more than one parent dataset found. Using the first one...")
+
+        parent = EbrainsKgV3Dataset._from_json(parent_datasets[0])
+        return parent.description
+
+
+
 class EbrainsDataset(Dataset, type_id="minds/core/dataset/v1.0.0"):
     def __init__(self, id, name, embargo_status=None):
         Dataset.__init__(self, id, description=None)
diff --git a/siibra/core/parcellation.py b/siibra/core/parcellation.py
@@ -17,7 +17,7 @@
 from .region import Region
 from .concept import AtlasConcept, provide_registry
 from .serializable_concept import JSONSerializable
-from .datasets import DatasetJsonModel, OriginDescription, EbrainsDataset
+from .datasets import DatasetJsonModel, OriginDescription, EbrainsDataset, EbrainsKgV3DatasetVersion, EbrainsKgV3Dataset
 
 from ..commons import logger, MapType, ParcellationIndex, Registry
 from ..volumes import ParcellationMap
@@ -208,14 +208,32 @@ def __init__(
         """
         AtlasConcept.__init__(self, identifier, name, dataset_specs)
         self.version = version
-        self.description = ""
+        self._description = ""
         self.modality = modality
         self._regiondefs = regiondefs
         if maps is not None:
             if self._datasets_cached is None:
                 self._datasets_cached = []
             self._datasets_cached.extend(maps)
         self.atlases = set()
+    
+    @property
+    def description(self):
+
+        metadata_datasets = [info
+            for info in self.infos
+            if isinstance(info, EbrainsDataset)
+            or isinstance(info, EbrainsKgV3DatasetVersion)
+            or isinstance(info, EbrainsKgV3Dataset)
+            or isinstance(info, OriginDescription)]
+
+        if len(metadata_datasets) == 0:
+            return self._description
+        
+        if len(metadata_datasets) > 1:
+            logger.debug(f"Parcellation.description multiple metadata_datasets found. Using the first one.")
+        
+        return metadata_datasets[0].description
 
     @property
     def regiontree(self):
@@ -560,7 +578,7 @@ def _from_json(cls, obj):
             result.modality = obj["modality"]
 
         if "description" in obj:
-            result.description = obj["description"]
+            result._description = obj["description"]
 
         if "publications" in obj:
             result._publications = obj["publications"]
diff --git a/test/core/test_datasets.py b/test/core/test_datasets.py
@@ -0,0 +1,58 @@
+from unittest import TestCase, mock, main as run_test
+from siibra.core.datasets import EbrainsKgV3DatasetVersion, _EbrainsKgV3Base, EbrainsKgV3Dataset
+
+DATASET_VERSION_TYPE = "https://openminds.ebrains.eu/core/DatasetVersion"
+DATASET_VERSION_ID = "fadcd2cb-9e8b-4e01-9777-f4d4df8f1ebc"
+
+DATASET_TYPE = ["https://openminds.ebrains.eu/core/Dataset"]
+DATASTE_ID = "https://kg.ebrains.eu/api/instances/82f91c95-6799-485a-ab9a-010c75f9e790"
+
+class TestEbrainsKgV3DatasetVersion(TestCase):
+    
+    def test_lazy_on_init(self):
+        with mock.patch.object(_EbrainsKgV3Base, '_query') as mock_query:
+            EbrainsKgV3DatasetVersion({
+                '@id': DATASET_VERSION_ID,
+                '@type': DATASET_VERSION_TYPE
+            })
+            assert not mock_query.called
+
+    def test_on_try_desc_called(self):
+        with mock.patch.object(_EbrainsKgV3Base, '_query') as mock_query:
+            EbrainsKgV3DatasetVersion({
+                '@id': DATASET_VERSION_ID,
+                '@type': DATASET_VERSION_TYPE
+            }).description
+            assert mock_query.called
+    
+    def test_return_desc_if_exists(self):
+        with mock.patch.object(EbrainsKgV3Dataset, '_from_json') as mock_parent_json:
+            with mock.patch.object(_EbrainsKgV3Base, '_query') as mock_query:
+                mock_query.return_value = {
+                    'description': 'foo-bar'
+                }
+                EbrainsKgV3DatasetVersion({
+                    '@id': DATASET_VERSION_ID,
+                    '@type': DATASET_VERSION_TYPE
+                }).description
+                assert not mock_parent_json.called
+
+    def test_fallback_to_parent_if_null_desc(self):
+        with mock.patch.object(EbrainsKgV3Dataset, '_from_json') as mock_parent_json:
+            with mock.patch.object(_EbrainsKgV3Base, '_query') as mock_query:
+                mock_query.return_value = {
+                    'description': '',
+                    'belongsTo': [{
+                        "type": DATASET_TYPE,
+                        "id": DATASTE_ID
+                    }]
+                }
+                EbrainsKgV3DatasetVersion({
+                    '@id': DATASET_VERSION_ID,
+                    '@type': DATASET_VERSION_TYPE
+                }).description
+                assert mock_parent_json.called
+
+
+if __name__ == "__main__":
+    run_test()
diff --git a/test/core/test_parcellation.py b/test/core/test_parcellation.py
@@ -114,3 +114,28 @@ def test_should_have_ebrains_doi(atlas_id,parc_id):
 
 if __name__ == "__main__":
     unittest.main()
+
+
+parc_has_desc = [
+    ("human", "julich brain 2.9"),
+
+    ("rat", "v4"),
+    ("rat", "v3"),
+    ("rat", "v2"),
+    ("rat", "v1"),
+
+    ("mouse", "2015"),
+    ("mouse", "2017"),
+]
+
+@pytest.mark.parametrize("atlas_id,parc_id", parc_has_desc)
+def test_should_have_desc(atlas_id,parc_id):
+
+    atlas = siibra.atlases[atlas_id]
+    parc = atlas.parcellations[parc_id]
+    model = parc.to_model()
+    
+    all(
+        len(ver.description) > 20
+        for ver in model.brain_atlas_versions
+    )