|
13 | 13 | # See the License for the specific language governing permissions and
|
14 | 14 | # limitations under the License.
|
15 | 15 |
|
16 |
| -import hashlib |
17 | 16 | from .serializable_concept import JSONSerializable
|
18 | 17 | from ..commons import logger
|
19 | 18 | from ..retrieval import EbrainsKgQuery
|
| 19 | +from ..retrieval.requests import DECODERS, EbrainsRequest |
20 | 20 | from ..openminds.core.v4.products.datasetVersion import Model as DatasetVersionModel
|
21 | 21 | from ..openminds.base import ConfigBaseModel
|
22 | 22 |
|
| 23 | +import hashlib |
23 | 24 | import re
|
24 | 25 | from datetime import date
|
25 |
| -from typing import List, Optional |
| 26 | +from typing import Any, Dict, List, Optional |
26 | 27 | from pydantic import Field
|
27 | 28 |
|
28 | 29 | class Url(ConfigBaseModel):
|
@@ -185,6 +186,108 @@ def _from_json(cls, spec):
|
185 | 186 | )
|
186 | 187 |
|
187 | 188 |
|
| 189 | +class _EbrainsKgV3Base: |
| 190 | + BASE_URL = "https://core.kg.ebrains.eu/v3-beta/queries" |
| 191 | + QUERY_ID = None |
| 192 | + STAGE = "RELEASED" |
| 193 | + |
| 194 | + def __init__(self, _id_spec: Dict[str, Any]) -> None: |
| 195 | + self._id_spec = _id_spec |
| 196 | + self._spec = None |
| 197 | + |
| 198 | + |
| 199 | + @classmethod |
| 200 | + def _query(cls, spec: Dict[str, Any]=None): |
| 201 | + |
| 202 | + # for easy compatibility with data returned by KG |
| 203 | + # KG no longer uses @id for key, but instead id (by default) |
| 204 | + # also id prepends domain information (e.g. https://kg.ebrains.eu/api/instances/{uuid}) |
| 205 | + # therefore, also extract the uuid protion |
| 206 | + |
| 207 | + if spec is not None: |
| 208 | + at_id=spec.get("@id") |
| 209 | + kg_id = spec.get("id") |
| 210 | + kg_id_search = kg_id and re.search(r'[a-f0-9-]+$', kg_id) |
| 211 | + |
| 212 | + assert at_id is not None or kg_id_search is not None |
| 213 | + uuid = at_id or kg_id_search.group() |
| 214 | + |
| 215 | + assert hasattr(cls, 'type_id') |
| 216 | + |
| 217 | + # for easy compatibility with data returned by KG |
| 218 | + # KG no longer uses @type for key, but type |
| 219 | + # also type is List[str] |
| 220 | + assert spec.get("@type") == cls.type_id or any ([t == cls.type_id for t in spec.get("type", [])]) |
| 221 | + |
| 222 | + |
| 223 | + url=f"{cls.BASE_URL}/{cls.QUERY_ID}/instances?stage={cls.STAGE}" |
| 224 | + if spec is not None: |
| 225 | + url += f"&instanceId={uuid}" |
| 226 | + |
| 227 | + result = EbrainsRequest(url, DECODERS['.json']).get() |
| 228 | + |
| 229 | + assert 'data' in result |
| 230 | + |
| 231 | + if spec is not None: |
| 232 | + assert result.get('total') == 1 |
| 233 | + assert result.get('size') == 1 |
| 234 | + return result.get("data")[0] |
| 235 | + |
| 236 | + return result.get('data', []) |
| 237 | + |
| 238 | + |
| 239 | +class EbrainsKgV3Dataset(Dataset, _EbrainsKgV3Base, type_id="https://openminds.ebrains.eu/core/Dataset"): |
| 240 | + BASE_URL = "https://core.kg.ebrains.eu/v3-beta/queries" |
| 241 | + QUERY_ID = "138111f9-1aa4-43f5-8e0a-6e6ed085fa3e" |
| 242 | + |
| 243 | + def __init__(self, spec: Dict[str, Any]): |
| 244 | + super().__init__(None) |
| 245 | + found = re.search(r'[a-f0-9-]+$', spec.get('id')) |
| 246 | + assert found |
| 247 | + self.id = found.group() |
| 248 | + self._description_cached = spec.get("description") |
| 249 | + self._spec = spec |
| 250 | + |
| 251 | + @classmethod |
| 252 | + def _from_json(cls, spec: Dict[str, Any]): |
| 253 | + json_obj = cls._query(spec) |
| 254 | + return cls(json_obj) |
| 255 | + |
| 256 | + |
| 257 | +class EbrainsKgV3DatasetVersion(Dataset, _EbrainsKgV3Base, type_id="https://openminds.ebrains.eu/core/DatasetVersion"): |
| 258 | + |
| 259 | + BASE_URL = "https://core.kg.ebrains.eu/v3-beta/queries" |
| 260 | + QUERY_ID = "f7489d01-2f90-410c-9812-9ee7d10cc5be" |
| 261 | + |
| 262 | + def __init__(self, _id_spec: Dict[str, Any]): |
| 263 | + _EbrainsKgV3Base.__init__(self, _id_spec) |
| 264 | + Dataset.__init__(self, None) |
| 265 | + |
| 266 | + @classmethod |
| 267 | + def _from_json(cls, spec: Dict[str, Any]): |
| 268 | + return cls(spec) |
| 269 | + |
| 270 | + @property |
| 271 | + def description(self): |
| 272 | + if self._spec is None: |
| 273 | + self._spec = self._query(self._id_spec) |
| 274 | + |
| 275 | + self._description_cached = self._spec.get("description") |
| 276 | + |
| 277 | + if self._description_cached is not None and self._description_cached != '': |
| 278 | + return self._description_cached |
| 279 | + |
| 280 | + parent_datasets = self._spec.get("belongsTo", []) |
| 281 | + if len(parent_datasets) == 0: |
| 282 | + return None |
| 283 | + if len(parent_datasets) > 1: |
| 284 | + logger.warn(f"EbrainsKgV3DatasetVersion.description: more than one parent dataset found. Using the first one...") |
| 285 | + |
| 286 | + parent = EbrainsKgV3Dataset._from_json(parent_datasets[0]) |
| 287 | + return parent.description |
| 288 | + |
| 289 | + |
| 290 | + |
188 | 291 | class EbrainsDataset(Dataset, type_id="minds/core/dataset/v1.0.0"):
|
189 | 292 | def __init__(self, id, name, embargo_status=None):
|
190 | 293 | Dataset.__init__(self, id, description=None)
|
|
0 commit comments