1
1
from pathlib import Path
2
- from rdflib import Graph , URIRef , RDFS
2
+ from rdflib import Graph , URIRef , RDFS , RDF , OWL
3
3
from abi import logger
4
4
import json
5
5
6
6
def consolidate_ontologies (ontologies_dir : str , output_file : str , mapping_file : str ) -> None :
7
7
"""Consolidate all ontology files and save URI-label mapping to Python file.
8
+ Only keeps classes, data properties, object properties, and annotation properties.
8
9
9
10
Args:
10
11
ontologies_dir (str): Path to directory containing ontology files
@@ -13,6 +14,12 @@ def consolidate_ontologies(ontologies_dir: str, output_file: str, mapping_file:
13
14
"""
14
15
# Create consolidated graph
15
16
consolidated = Graph ()
17
+ filtered = Graph ()
18
+
19
+ # Set CCO prefix
20
+ filtered .bind ('cco' , 'https://www.commoncoreontologies.org/' )
21
+ filtered .bind ('abi' , 'http://ontology.naas.ai/abi/' )
22
+ filtered .bind ('bfo' , 'http://purl.obolibrary.org/obo/' )
16
23
17
24
# Get all .ttl files recursively
18
25
ontologies_path = Path (ontologies_dir )
@@ -31,16 +38,35 @@ def consolidate_ontologies(ontologies_dir: str, output_file: str, mapping_file:
31
38
consolidated += g
32
39
except Exception as e :
33
40
logger .error (f"Error loading { ttl_file } : { e } " )
41
+
42
+ # Filter for desired types
43
+ desired_types = {
44
+ OWL .Class ,
45
+ OWL .DatatypeProperty ,
46
+ OWL .ObjectProperty ,
47
+ OWL .AnnotationProperty
48
+ }
49
+
50
+ # Add all triples where subject is of desired type
51
+ for s , p , o in consolidated .triples ((None , RDF .type , None )):
52
+ if o in desired_types :
53
+ # Add the type triple
54
+ filtered .add ((s , p , o ))
55
+ # Add all triples where this subject is involved
56
+ for s2 , p2 , o2 in consolidated .triples ((s , None , None )):
57
+ filtered .add ((s2 , p2 , o2 ))
58
+ for s2 , p2 , o2 in consolidated .triples ((None , None , s )):
59
+ filtered .add ((s2 , p2 , o2 ))
34
60
35
- # Save consolidated ontology and mapping
61
+ # Save filtered ontology and mapping
36
62
try :
37
- consolidated .serialize (destination = output_file , format = "turtle" )
38
- logger .info (f"Saved consolidated ontology to { output_file } " )
39
- logger .info (f"Total triples: { len (consolidated )} " )
63
+ filtered .serialize (destination = output_file , format = "turtle" )
64
+ logger .info (f"Saved filtered ontology to { output_file } " )
65
+ logger .info (f"Total triples: { len (filtered )} " )
40
66
41
67
# Create URI-label mapping
42
68
mapping = {}
43
- for s , p , o in consolidated .triples ((None , RDFS .label , None )):
69
+ for s , p , o in filtered .triples ((None , RDFS .label , None )):
44
70
if isinstance (s , URIRef ):
45
71
mapping [str (s )] = str (o )
46
72
@@ -58,8 +84,7 @@ def consolidate_ontologies(ontologies_dir: str, output_file: str, mapping_file:
58
84
59
85
if __name__ == "__main__" :
60
86
# Define paths relative to project root
61
- project_root = Path (__file__ ).parent .parent .parent
62
- ontologies_dir = project_root / "src" / "ontologies"
87
+ ontologies_dir = Path (__file__ ).parent
63
88
output_file = ontologies_dir / "ConsolidatedOntology.ttl"
64
89
mapping_file = ontologies_dir / "mapping.py"
65
90
0 commit comments