Skip to content

Commit d1260a0

Browse files
authored
Merge pull request #110 from FZJ-INM1-BDA/bugfix_ebrainsDatasetSpecies
bugfix: filter dataset by species
2 parents 2c43ae0 + 71baf53 commit d1260a0

File tree

9 files changed

+170
-24
lines changed

9 files changed

+170
-24
lines changed

pytest.ini

+2-5
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,4 @@
11
[pytest]
2-
addopts = --maxfail=1 -rf
2+
addopts = -rf
33
testpaths =
4-
test/core
5-
test/retrieval
6-
test/volumes
7-
test/features
4+
test/

siibra/core/atlas.py

+32-2
Original file line numberDiff line numberDiff line change
@@ -32,20 +32,50 @@ class Atlas(
3232
spaces, as well as common functionalities of those.
3333
"""
3434

35-
def __init__(self, identifier, name):
35+
@staticmethod
36+
def get_species_data(species_str: str):
37+
if species_str == 'human':
38+
return {
39+
'@id': 'https://nexus.humanbrainproject.org/v0/data/minds/core/species/v1.0.0/0ea4e6ba-2681-4f7d-9fa9-49b915caaac9',
40+
'name': 'Homo sapiens'
41+
}
42+
if species_str == 'rat':
43+
return {
44+
'@id': 'https://nexus.humanbrainproject.org/v0/data/minds/core/species/v1.0.0/f3490d7f-8f7f-4b40-b238-963dcac84412',
45+
'name': 'Rattus norvegicus'
46+
}
47+
if species_str == 'mouse':
48+
return {
49+
'@id': 'https://nexus.humanbrainproject.org/v0/data/minds/core/species/v1.0.0/cfc1656c-67d1-4d2c-a17e-efd7ce0df88c',
50+
'name': 'Mus musculus'
51+
}
52+
# TODO this may not be correct. Wait for feedback and get more accurate
53+
if species_str == 'monkey':
54+
return {
55+
'@id': 'https://nexus.humanbrainproject.org/v0/data/minds/core/species/v1.0.0/3f75b0ad-dbcd-464e-b614-499a1b9ae86b',
56+
'name': 'Primates'
57+
}
58+
59+
raise ValueError(f'species with spec {species_str} cannot be decoded')
60+
61+
def __init__(self, identifier, name, species = None):
3662
"""Construct an empty atlas object with a name and identifier."""
3763

3864
AtlasConcept.__init__(self, identifier, name, dataset_specs=[])
3965

4066
self._parcellations = [] # add with _add_parcellation
4167
self._spaces = [] # add with _add_space
68+
if species is not None:
69+
self.species = self.get_species_data(species)
4270

4371
def _register_space(self, space):
4472
"""Registers another reference space to the atlas."""
73+
space.atlases.add(self)
4574
self._spaces.append(space)
4675

4776
def _register_parcellation(self, parcellation):
4877
"""Registers another parcellation to the atlas."""
78+
parcellation.atlases.add(self)
4979
self._parcellations.append(parcellation)
5080

5181
@property
@@ -75,7 +105,7 @@ def _from_json(cls, obj):
75105
f"{cls.__name__} construction attempt from invalid json format (@type={obj.get('@type')}"
76106
)
77107
if all(["@id" in obj, "spaces" in obj, "parcellations" in obj]):
78-
atlas = cls(obj["@id"], obj["name"])
108+
atlas = cls(obj["@id"], obj["name"], species=obj["species"])
79109
for space_id in obj["spaces"]:
80110
if not Space.REGISTRY.provides(space_id):
81111
raise ValueError(

siibra/core/parcellation.py

+1
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,7 @@ def __init__(
156156
if self._datasets_cached is None:
157157
self._datasets_cached = []
158158
self._datasets_cached.extend(maps)
159+
self.atlases = set()
159160

160161
@property
161162
def regiontree(self):

siibra/core/space.py

+1
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ def __init__(
5151
AtlasConcept.__init__(self, identifier, name, dataset_specs)
5252
self.src_volume_type = src_volume_type
5353
self.type = template_type
54+
self.atlases = set()
5455

5556
def get_template(self):
5657
"""

siibra/features/connectivity.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -128,9 +128,9 @@ class ConnectivityProfile(RegionalFeature):
128128

129129
show_as_log = True
130130

131-
def __init__(self, regionspec: str, connectivitymatrix: ConnectivityMatrix, index):
131+
def __init__(self, regionspec: str, connectivitymatrix: ConnectivityMatrix, index, **kwargs):
132132
assert regionspec is not None
133-
RegionalFeature.__init__(self, regionspec)
133+
RegionalFeature.__init__(self, regionspec, **kwargs)
134134
self._matrix_index = index
135135
self._matrix = connectivitymatrix
136136

@@ -184,13 +184,14 @@ def __init__(self, **kwargs):
184184
for _, loader in self._QUERY.get_loaders("connectivity", ".json"):
185185
cm = ConnectivityMatrix._from_json(loader.data)
186186
for parcellation in cm.parcellations:
187+
species = [atlas.species for atlas in parcellation.atlases]
187188
for regionname in cm.regionnames:
188189
region = parcellation.decode_region(regionname, build_group=False)
189190
if region is None:
190191
raise RuntimeError(
191192
f"Could not decode region name {regionname} in {parcellation}"
192193
)
193-
self.register(ConnectivityProfile(region, cm, regionname))
194+
self.register(ConnectivityProfile(region, cm, regionname, species=species))
194195

195196

196197
class ConnectivityMatrixQuery(FeatureQuery):

siibra/features/ebrains.py

+33-6
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@
2222

2323

2424
class EbrainsRegionalDataset(RegionalFeature, EbrainsDataset):
25-
def __init__(self, regionspec, kg_id, name, embargo_status):
26-
RegionalFeature.__init__(self, regionspec)
25+
def __init__(self, regionspec, kg_id, name, embargo_status, species = []):
26+
RegionalFeature.__init__(self, regionspec, species)
2727
EbrainsDataset.__init__(self, kg_id, name, embargo_status)
2828

2929
@property
@@ -46,17 +46,26 @@ class EbrainsRegionalFeatureQuery(FeatureQuery):
4646
_FEATURETYPE = EbrainsRegionalDataset
4747

4848
def __init__(self, **kwargs):
49-
5049
FeatureQuery.__init__(self)
5150

5251
loader = EbrainsRequest(
53-
query_id="siibra-kg-feature-summary-0.0.1",
52+
query_id="siibra-kg-feature-summary-0_0_4",
5453
schema="parcellationregion",
5554
params={"vocab": "https://schema.hbp.eu/myQuery/"},
5655
)
5756

5857
for r in loader.data.get("results", []):
58+
59+
species_alt = []
60+
# List, keys @id, name
5961
for dataset in r.get("datasets", []):
62+
species_alt = [
63+
*species_alt,
64+
*dataset.get('ds_specimengroup_subject_species', []),
65+
*dataset.get('s_subject_species', []),
66+
]
67+
for dataset in r.get("datasets", []):
68+
6069
ds_id = dataset.get("@id")
6170
ds_name = dataset.get("name")
6271
ds_embargo_status = dataset.get("embargo_status")
@@ -65,10 +74,28 @@ def __init__(self, **kwargs):
6574
f"'{ds_name}' is not an interpretable dataset and will be skipped.\n(id:{ds_id})"
6675
)
6776
continue
68-
regionname = r.get("name", None)
77+
regionname: str = r.get("name", None)
78+
alias: str = r.get("alias", None)
79+
80+
# species defined for the current dataset
81+
dataset_species = [
82+
*dataset.get('ds_specimengroup_subject_species', []),
83+
*dataset.get('s_subject_species', []),
84+
]
85+
86+
# if the current dataset has species defined, use the current species, else use the general speices
87+
species = [*r.get("species", []), *(dataset_species if dataset_species else species_alt)] # list with keys @id, identifier, name
88+
89+
# filter species by @id attribute
90+
unique_species = []
91+
for sp in species:
92+
if sp.get('@id') in [s.get('@id') for s in unique_species]:
93+
continue
94+
unique_species.append(sp)
95+
6996
self.register(
7097
EbrainsRegionalDataset(
71-
regionname, ds_id, ds_name, ds_embargo_status
98+
alias or regionname, ds_id, ds_name, ds_embargo_status, unique_species
7299
)
73100
)
74101

siibra/features/feature.py

+27-1
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ class RegionalFeature(Feature):
181181
TODO store region as an object that has a link to the parcellation
182182
"""
183183

184-
def __init__(self, regionspec: Tuple[str, Region]):
184+
def __init__(self, regionspec: Tuple[str, Region], species = [], **kwargs):
185185
"""
186186
Parameters
187187
----------
@@ -194,6 +194,11 @@ def __init__(self, regionspec: Tuple[str, Region]):
194194
)
195195
Feature.__init__(self)
196196
self.regionspec = regionspec
197+
self.species = species
198+
199+
@property
200+
def species_ids(self):
201+
return [s.get('@id') for s in self.species]
197202

198203
def match(self, concept):
199204
"""
@@ -208,6 +213,24 @@ def match(self, concept):
208213
-------
209214
True, if match was successful, otherwise False
210215
"""
216+
217+
# first check if any of
218+
try:
219+
if isinstance(concept, Region):
220+
atlases = concept.parcellation.atlases
221+
if isinstance(concept, Parcellation):
222+
atlases = concept.atlases
223+
if isinstance(concept, Atlas):
224+
atlases = {concept}
225+
if atlases:
226+
# if self.species_ids is defined, and the concept is explicitly not in
227+
# return False
228+
if all(atlas.species.get('@id') not in self.species_ids for atlas in atlases):
229+
return False
230+
# for backwards compatibility. If any attr is not found, pass
231+
except AttributeError:
232+
pass
233+
211234
self._match = None
212235

213236
# regionspec might be a specific region, then we can
@@ -227,13 +250,15 @@ def match(self, concept):
227250
for w in concept.key.split('_'):
228251
spec = spec.replace(w.lower(), '')
229252
for match in concept.regiontree.find(spec):
253+
# TODO what's with the mutation here?
230254
self._match = match
231255
return True
232256

233257
elif isinstance(concept, Region):
234258
for w in concept.parcellation.key.split('_'):
235259
spec = spec.replace(w.lower(), '')
236260
for match in concept.find(spec):
261+
# TODO what's with the mutation here?
237262
self._match = match
238263
return True
239264

@@ -245,6 +270,7 @@ def match(self, concept):
245270
spec = spec.replace(w.lower(), '')
246271
for p in concept.parcellations:
247272
for match in p.regiontree.find(spec):
273+
# TODO what's with the mutation here?
248274
self._match = match
249275
return True
250276
else:

siibra/features/receptors.py

+11-6
Original file line numberDiff line numberDiff line change
@@ -357,9 +357,9 @@ class ReceptorDistribution(RegionalFeature, EbrainsDataset):
357357
TODO lazy loading could be more elegant.
358358
"""
359359

360-
def __init__(self, region, kg_result):
360+
def __init__(self, region, kg_result, **kwargs):
361361

362-
RegionalFeature.__init__(self, region)
362+
RegionalFeature.__init__(self, region, **kwargs)
363363
EbrainsDataset.__init__(self, kg_result["identifier"], kg_result["name"])
364364

365365
self.info = kg_result["description"]
@@ -488,13 +488,18 @@ class ReceptorQuery(FeatureQuery):
488488

489489
def __init__(self,**kwargs):
490490
FeatureQuery.__init__(self)
491-
kg_query = EbrainsRequest(query_id="siibra_receptor_densities").get()
492-
# kg_query = ebrains.execute_query_by_id('minds', 'core', 'dataset', 'v1.0.0', )
491+
kg_req = EbrainsRequest(
492+
query_id="siibra_receptor_densities-0_0_2",
493+
params={'vocab': 'https://schema.hbp.eu/myQuery/' }
494+
)
495+
kg_query = kg_req.get()
496+
493497
not_used = 0
494498
for kg_result in kg_query["results"]:
495-
region_names = [e["name"] for e in kg_result["region"]]
499+
region_names = [p_region["name"] for p_region in kg_result["parcellationRegion"]]
500+
species = kg_result.get('species', [])
496501
for region_name in region_names:
497-
f = ReceptorDistribution(region_name, kg_result)
502+
f = ReceptorDistribution(region_name, kg_result, species=species)
498503
if f.fingerprint is None:
499504
not_used += 1
500505
else:

test/features/test_ebrainsquery.py

+59-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
1+
from typing import List
12
import unittest
23
import siibra
3-
4+
import pytest
5+
from siibra.core import Parcellation, Atlas, Region
6+
from siibra.features.feature import Feature
47

58
class TestEbrainsQuery(unittest.TestCase):
69
@classmethod
@@ -17,5 +20,60 @@ def test_no_duplicates_returned(self):
1720
assert len(self.feat) == len(list(set(ids)))
1821

1922

23+
parameter = [
24+
('rat', 'v3', 'neocortex', {
25+
'exclude': [
26+
# buggy. hippocampus + difumo 512 is a big issue
27+
'DiFuMo atlas (512 dimensions)'
28+
],
29+
'include': [
30+
# some of these still clearly doesn't look right
31+
# for e.g. some v1/v2 are in here
32+
# but one step at a time...
33+
'Large scale multi-channel EEG in rats',
34+
'Wistar rat hippocampus CA1 pyramidal cell morphologies – Extension with additional reconstructions',
35+
'Auditory stimulation during the sleep-wake cycle in the freely moving rat',
36+
'3D high resolution SRXTM image data of cortical vasculature of rat brain.',
37+
'Density measurements of different receptors for CA1 (Hippocampus) [rat, v2.0]',
38+
'Visualization of projections from insular cortex in rat with the use of anterograde tracers',
39+
'Density measurements of different receptors for CA, stratum moleculare (Hippocampus) [rat, v2.0]',
40+
'Density measurements of different receptors for CA2 (Hippocampus) [rat, v2.0]',
41+
'PCI-like measure in rodents',
42+
'Density measurements of different receptors for CA3 (Hippocampus) [rat, v2.0]',
43+
'Dose-dependent effects of ketamine on spontaneous and evoked EEG activity in rats',
44+
'Detailed dynamical laminar organisation in different cortical areas (in rats in vivo)',
45+
'Density measurements of different receptors for CA, stratum cellulare (Hippocampus) [rat, v2.0]',
46+
'3D reconstructions of pyramidal cells in rat hippocampal CA1 region',
47+
'Electrophysiological data of cortical layer 6 neurons and synaptically coupled neuronal pairs',
48+
'Density measurements of different receptors for DG (Hippocampus) [rat, v1.0]',
49+
'Test of consciousness metrics in rodents',
50+
'Morphological data of cortical layer 6 neurons and synaptically coupled neuronal pairs',
51+
'Visualization of projections from posterior parietal cortex in rat with the use of anterograde tracers',
52+
'Immunofluorescence data of cortical layer 6 neurons',
53+
'Density measurements of different receptors for DG (Hippocampus) [rat, v2.0]',
54+
'Graphical representation of rat cortical vasculature reconstructed from high resolution 3D SRXTM data.',
55+
'Density measurements of different receptors for CA, stratum cellulare (Hippocampus) [rat, v1.0]',
56+
'Wistar rat hippocampus CA1 pyramidal cell morphologies',
57+
'Density measurements of different receptors for CA3 (Hippocampus) [rat, v1.0]',
58+
'Density measurements of different receptors for CA1 (Hippocampus) [rat, v1.0]',
59+
'Density measurements of different receptors for CA, stratum moleculare (Hippocampus) [rat, v1.0]',
60+
'Density measurements of different receptors for CA2 (Hippocampus) [rat, v1.0]',
61+
'Multi-area recordings from visual and somatosensory cortices, perirhinal cortex and hippocampal CA1']
62+
})
63+
]
64+
65+
@pytest.mark.parametrize('atlas_id,parc_id,region_id,inc_exc', parameter)
66+
def test_species(atlas_id,parc_id,region_id,inc_exc):
67+
atlas:Atlas = siibra.atlases[atlas_id]
68+
parc:Parcellation = atlas.parcellations[parc_id]
69+
r:Region = parc.decode_region(region_id)
70+
features: List[Feature] = siibra.get_features(r, 'ebrains')
71+
feature_names = [f.name for f in features]
72+
73+
excludes: List[str] = inc_exc.get('exclude')
74+
includes: List[str] = inc_exc.get('include')
75+
assert all(exc not in feature_names for exc in excludes)
76+
assert all(inc in feature_names for inc in includes)
77+
2078
if __name__ == "__main__":
2179
unittest.main()

0 commit comments

Comments
 (0)