Skip to content

Commit b39d677

Browse files
add hpo queries notebook
1 parent 8414bcc commit b39d677

File tree

1 file changed

+136
-0
lines changed

1 file changed

+136
-0
lines changed

notebooks/hpo.ipynb

+136
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 11,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"import sys\n",
10+
"import os\n",
11+
"from py2neo import Graph\n",
12+
"from pathlib import Path\n",
13+
"from pandas import DataFrame\n",
14+
"\n",
15+
"graph = Graph(\"bolt://localhost:8687\")\n",
16+
"\n",
17+
"# df = DataFrame(graph.run(\"\"\"\n",
18+
"# MATCH (id:Id { id:\"chebi:5063\"})<-[:id]-(n:GraphNode)\n",
19+
"# RETURN n.`grebi:name`[0] as name\n",
20+
"# \"\"\").data())\n",
21+
"\n",
22+
"# print(df)\n",
23+
"\n",
24+
"\n",
25+
"#Returns all HP terms. Each term node definitely corresponds to an HP term, but may ALSO correspond to an MP term due to the mappings.\n",
26+
"#This also means that the relationships may come from either HP or MP.\n",
27+
"#\n",
28+
"df = DataFrame(graph.run(\"\"\"\n",
29+
"MATCH (id:Id { id:\"hp:0000001\"})<-[:id]-(hpo_root_term:GraphNode)\n",
30+
" <-[:`biolink:broad_match`]-(term:GraphNode)\n",
31+
" -[outgoing_edge]->(n:GraphNode)\n",
32+
"RETURN term.`grebi:name`[0] AS from, type(outgoing_edge) AS edge, n.`grebi:name`[0] AS to\n",
33+
"\"\"\").data())\n",
34+
"\n",
35+
"df.to_csv(\"all_hp_all_out.csv\", index=False)"
36+
]
37+
},
38+
{
39+
"cell_type": "code",
40+
"execution_count": 12,
41+
"metadata": {},
42+
"outputs": [],
43+
"source": [
44+
"\n",
45+
"\n",
46+
"# This version of the above query filters the relationships to those asserted by HP only (not MP)\n",
47+
"df = DataFrame(graph.run(\"\"\"\n",
48+
"MATCH (id:Id { id:\"hp:0000001\"})<-[:id]-(hpo_root_term:GraphNode)\n",
49+
" <-[:`biolink:broad_match`]-(term:GraphNode)\n",
50+
" -[outgoing_edge]->(n:GraphNode)\n",
51+
" WHERE \"OLS.hp\" IN outgoing_edge.`grebi:datasources`\n",
52+
"RETURN term.`grebi:name`[0] AS from, type(outgoing_edge) AS edge, n.`grebi:name`[0] AS to\n",
53+
"\"\"\").data())\n",
54+
"\n",
55+
"df.to_csv(\"all_hp_all_out_hp_only.csv\", index=False)"
56+
]
57+
},
58+
{
59+
"cell_type": "code",
60+
"execution_count": 14,
61+
"metadata": {},
62+
"outputs": [],
63+
"source": [
64+
"\n",
65+
"\n",
66+
"# This version of the above query filters the relationships to those asserted by HP only (not MP)\n",
67+
"# Also adds identifiers in the results\n",
68+
"\n",
69+
"df = DataFrame(graph.run(\"\"\"\n",
70+
"MATCH (id:Id { id:\"hp:0000001\"})<-[:id]-(hpo_root_term:GraphNode)\n",
71+
" <-[:`biolink:broad_match`]-(term:GraphNode)\n",
72+
" -[outgoing_edge]->(n:GraphNode)\n",
73+
" WHERE \"OLS.hp\" IN outgoing_edge.`grebi:datasources`\n",
74+
"RETURN\n",
75+
" [id in term.id WHERE id =~ \"hp:[0-9]*\" | id][0] AS from_id,\n",
76+
" term.`grebi:name`[0] AS from_label,\n",
77+
" type(outgoing_edge) AS edge,\n",
78+
" n.id AS to_ids,\n",
79+
" n.`grebi:name`[0] AS to_label\n",
80+
"\"\"\").data())\n",
81+
"\n",
82+
"df.to_csv(\"all_hp_all_out_hp_outgoing.csv\", index=False)\n",
83+
"\n"
84+
]
85+
},
86+
{
87+
"cell_type": "code",
88+
"execution_count": 17,
89+
"metadata": {},
90+
"outputs": [],
91+
"source": [
92+
"\n",
93+
"\n",
94+
"# This query returns all incoming edges to all HP terms\n",
95+
"# Note that the incoming edges may target either the HP terms or their equivalent MP terms\n",
96+
"# and there is currently no way to differentiate!\n",
97+
"# We will prob have to make two different versions of the Neo4j, one with merged mappings and one without\n",
98+
"\n",
99+
"df = DataFrame(graph.run(\"\"\"\n",
100+
"MATCH (id:Id { id:\"hp:0000001\"})<-[:id]-(hpo_root_term:GraphNode)\n",
101+
" <-[:`biolink:broad_match`]-(term:GraphNode)\n",
102+
" <-[incoming_edge]-(n:GraphNode)\n",
103+
"RETURN\n",
104+
" [id in term.id WHERE id =~ \"hp:[0-9]*\" | id][0] AS to_id,\n",
105+
" term.`grebi:name`[0] AS to_label,\n",
106+
" type(incoming_edge) AS edge,\n",
107+
" n.id AS from_ids,\n",
108+
" n.`grebi:name`[0] AS from_label\n",
109+
"\"\"\").data())\n",
110+
"\n",
111+
"df.to_csv(\"all_hp_all_in_hp.csv\", index=False)"
112+
]
113+
}
114+
],
115+
"metadata": {
116+
"kernelspec": {
117+
"display_name": ".venv",
118+
"language": "python",
119+
"name": "python3"
120+
},
121+
"language_info": {
122+
"codemirror_mode": {
123+
"name": "ipython",
124+
"version": 3
125+
},
126+
"file_extension": ".py",
127+
"mimetype": "text/x-python",
128+
"name": "python",
129+
"nbconvert_exporter": "python",
130+
"pygments_lexer": "ipython3",
131+
"version": "3.10.0"
132+
}
133+
},
134+
"nbformat": 4,
135+
"nbformat_minor": 2
136+
}

0 commit comments

Comments
 (0)