2
2
3
3
import pandas as pd
4
4
5
- import reprlib
6
-
7
-
8
5
9
6
class PolygenicScore :
10
7
"""Represents the output of plink2 --score written to a file
@@ -13,7 +10,7 @@ class PolygenicScore:
13
10
>>> score1 = Config.ROOT_DIR / "tests" / "cineca_22_additive_0.sscore.zst"
14
11
>>> pgs1 = PolygenicScore(sampleset="test", path=score1) # doctest: +ELLIPSIS
15
12
>>> pgs1
16
- PolygenicScore(sampleset='test', path=PosixPath('.../cineca_22_additive_0.sscore.zst'), df=None )
13
+ PolygenicScore(sampleset='test', path=PosixPath('.../cineca_22_additive_0.sscore.zst'))
17
14
>>> pgs2 = PolygenicScore(sampleset="test", path=score1)
18
15
>>> pgs1.read().to_dict() # doctest: +ELLIPSIS
19
16
{'DENOM': ...}, 'PGS001229_22_SUM': {('test', 'HG00096'): 0.54502, ('test', 'HG00097'): 0.674401, ('test', 'HG00099'): 0.63727, ('test', 'HG00100'): 0.863944, ...}}
@@ -22,7 +19,7 @@ class PolygenicScore:
22
19
23
20
>>> aggregated_score = pgs1 + pgs2
24
21
>>> aggregated_score # doctest: +ELLIPSIS
25
- PolygenicScore(sampleset='test', path=None, df={'DENOM': ...}, 'PGS001229_22_SUM': {('test', 'HG00096'): 1.09004, ('test', 'HG00097'): 1.348802, ('test', 'HG00099'): 1.27454, ('test', 'HG00100'): 1.727888, ...}} )
22
+ PolygenicScore(sampleset='test', path=None)
26
23
27
24
Once a score has been fully aggregated it can be helpful to recalculate an average:
28
25
@@ -45,7 +42,7 @@ class PolygenicScore:
45
42
['test_pgs.txt.gz']
46
43
"""
47
44
48
- def __init__ (self , * , sampleset , path = None , df = None ):
45
+ def __init__ (self , * , path = None , df = None , sampleset = None ):
49
46
match (path , df ):
50
47
case (None , None ):
51
48
raise ValueError ("init with path or df" )
@@ -164,6 +161,7 @@ def _select_agg_cols(cols):
164
161
if (x .endswith ("_SUM" ) and (x != "NAMED_ALLELE_DOSAGE_SUM" )) or (x in keep_cols )
165
162
]
166
163
164
+
167
165
def _melt (df , value_name ):
168
166
"""Melt the score dataframe from wide format to long format"""
169
167
df = df .melt (
@@ -175,4 +173,3 @@ def _melt(df, value_name):
175
173
# e.g. PGS000822_SUM -> PGS000822
176
174
df ["PGS" ] = df ["PGS" ].str .replace (f"_{ value_name } " , "" )
177
175
return df
178
-
0 commit comments