Skip to content

Commit 487e297

Browse files
Add files via upload
1 parent 75c5d37 commit 487e297

File tree

1 file changed

+159
-0
lines changed

1 file changed

+159
-0
lines changed

Toolbox.py

+159
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,68 @@
1+
# -*- coding: utf-8 -*-
2+
"""
3+
Created on Mon Oct 7 18:41:14 2019
4+
5+
@author: obazgir
6+
"""
7+
18
import numpy as np
29
import pandas as pd
310
import os
11+
import matplotlib.pyplot as plt
12+
from sklearn.model_selection import train_test_split
13+
from fancyimpute import KNN
414
from scipy.stats import pearsonr
15+
from sklearn.linear_model import LinearRegression
16+
17+
18+
19+
20+
#%% NRMSE
21+
def NRMSE(Y_Target, Y_Predict):
22+
Y_Target = np.array(Y_Target); Y_Predict = np.array(Y_Predict);
23+
Y_Target = Y_Target.reshape(len(Y_Target),1); Y_Predict = Y_Predict.reshape(len(Y_Predict),1);
24+
Y_Bar = np.mean(Y_Target)
25+
Nom = np.sum((Y_Predict - Y_Target)**2); Denom = np.sum((Y_Bar - Y_Target)**2)
26+
MSE = np.mean((Y_Predict - Y_Target)**2); NRMSE = np.sqrt(Nom/Denom)
27+
R2 = 1 - NRMSE**2
28+
return NRMSE, R2
29+
30+
def NMAE(Y_Target, Y_Predict):
31+
Y_Target = np.array(Y_Target); Y_Predict = np.array(Y_Predict);
32+
Y_Target = Y_Target.reshape(len(Y_Target),1); Y_Predict = Y_Predict.reshape(len(Y_Predict),1);
33+
Y_Bar = np.mean(Y_Target)
34+
Nom = np.abs((Y_Predict - Y_Target)); Denom = np.abs((Y_Bar - Y_Target))
35+
NormMAE = np.mean(Nom)/np.mean(Denom)
36+
return NormMAE
37+
38+
39+
#%% Random position generation
40+
import math
41+
def Random_position(p):
42+
NN = int(math.sqrt(p)) +1
43+
Feat_num = np.arange(p)
44+
np.random.shuffle(Feat_num)
45+
#Feature_List = [f'F{i}' for i in Feat_num]
46+
Pos_mat = []
47+
for i in range(p):
48+
Pos_mat.append([int(Feat_num[i]/NN),int(Feat_num[i]%NN)])
49+
return(Pos_mat)
50+
51+
def Random_Image_Gen(X, Rand_Pos_mat):
52+
sz = X.shape
53+
p = sz[1]; N = sz[0]
54+
NN = int(math.sqrt(p)) +1
55+
Im = np.zeros((NN,NN))
56+
X_Gen = np.zeros((N,NN**2))
57+
for j in range(N):
58+
for i in range(p):
59+
P = Rand_Pos_mat[i]
60+
Im[P[0],P[1]] = X[j,i]
61+
Image_Store = Im.reshape((NN**2,1)).T
62+
X_Gen[j,:] = Image_Store
63+
return X_Gen
64+
65+
#%% MDS by Ruibo
566
from sklearn.manifold import MDS
667

768
def two_d_norm(xy):
@@ -101,3 +162,101 @@ def Assign_features_to_pixels(xy,nn,verbose = False):
101162
#eq_xy = two_d_eq(mds_xy)
102163
#Img = Assign_features_to_pixels(eq_xy,nn,verbose=1)
103164
#Init_Corr_MDS = InitCorr(dist_mat,Img,nn)
165+
166+
def MDS_Im_Gen(X,nn, Img):
167+
[N_sam,P_Feat] = X.shape
168+
X_Gen = np.zeros((N_sam,nn**2))
169+
conv_Img = Img.reshape(Img.size,1)
170+
for i in range(nn**2):
171+
Feature = np.array(conv_Img[i]); Feature = Feature[0]; F_Num = int(Feature[1:])
172+
if abs(F_Num) < nn**2:
173+
X_Gen[:,i] = X[:,F_Num]
174+
else:
175+
X_Gen[:,i] = 0
176+
return X_Gen
177+
178+
#%% CCLE functions
179+
def dataframer(Main,Set_in, name_in, name_out):
180+
A = Set_in[name_in].tolist()
181+
Set_out = Main[Main[name_out] == A[0]]
182+
for cell in range(len(A) - 1):
183+
df = Main[Main[name_out] == A[cell + 1]]
184+
Set_out = pd.concat([Set_out, df])
185+
return Set_out
186+
187+
def Reg_to_Class(Y,Threshold):
188+
Y_Class = np.zeros(len(Y))
189+
Y_Sens = np.where(Y > Threshold)
190+
Y_Class[Y_Sens] = 1
191+
Y_Class = Y_Class.astype(int)
192+
Y_Class = Y_Class.tolist()
193+
Y_Class = np.array(Y_Class)
194+
return Y_Class
195+
196+
def floattoint(Y_Test_Encoded):
197+
Y_Class = np.zeros(Y_Test_Encoded.shape)
198+
Y_Sens = np.where(Y_Test_Encoded > 0.5)
199+
Y_Class[Y_Sens] = 1
200+
Y_Class = Y_Class.astype(int)
201+
Y_Class = Y_Class.tolist()
202+
Y_Class = np.array(Y_Class)
203+
return Y_Class
204+
def REFINED_Im_Gen(X,nn, map_in_int, gene_names,coords):
205+
[N_sam,P_Feat] = X.shape
206+
X_Gen = np.zeros((N_sam,nn**2))
207+
for i in range(N_sam):
208+
data = X[i,:]
209+
X_REFINED = pd.DataFrame(data = data.reshape(1,len(data)), columns = gene_names)
210+
Image = np.zeros(map_in_int.shape)
211+
for j in range(len(coords)):
212+
val = np.array(X_REFINED[gene_names[j]])
213+
Image[coords[j,0],coords[j,1]] = val
214+
Image = Image.reshape(nn**2)
215+
X_Gen[i,:] = Image
216+
return X_Gen
217+
#%% GDSC
218+
def GDSC_dataframer(PD_Set, Set_Name,PD_Attribute,Attribute_Name):
219+
A = PD_Set[Set_Name].tolist()
220+
b = PD_Attribute[PD_Attribute[Attribute_Name] == A[0]].reset_index().drop(columns = ['index'])
221+
Data_arry = np.array(b.values[0,1:],dtype = float)
222+
Data_arry = Data_arry.reshape(1,len(Data_arry))
223+
for i in range(len(A) - 1 ):
224+
b = PD_Attribute[PD_Attribute[Attribute_Name] == A[i + 1]].reset_index().drop(columns = ['index'])
225+
Arr = np.array(b.values[0,1:],dtype = float)
226+
Arr = Arr.reshape(1,len(Arr))
227+
Data_arry = np.append(Data_arry,Arr, axis = 0)
228+
229+
PD_Data_arry = pd.DataFrame(data = Data_arry, columns = PD_Attribute.columns.tolist()[1:], index = A)
230+
return Data_arry, PD_Data_arry
231+
232+
def GDSC_NPier(PD_Set, Set_Name,PD_Attribute,Attribute_Name):
233+
PD_Set = PD_Set.reset_index()
234+
PD_Set.shape[0]
235+
PD_Attribute.shape[1] - 1
236+
X_NP = np.zeros((PD_Set.shape[0],PD_Attribute.shape[1] - 1))
237+
Source = list(set(PD_Set[Set_Name].tolist()))
238+
for name in Source:
239+
idx = PD_Set.index[PD_Set[Set_Name] == name].tolist()
240+
XX = np.array(PD_Attribute[PD_Attribute[Attribute_Name] == name].values[0,1:], dtype = float)
241+
X_NP[idx,:] = XX
242+
return X_NP
243+
244+
245+
def Coord_Converter(coords_drug2,nn):
246+
coords_drug3 = np.full((nn,nn),'NaN').astype(object)
247+
for i in range(nn):
248+
for j in range(nn):
249+
ft = 'F' + str(coords_drug2[i,j])
250+
coords_drug3[i,j] = ft
251+
return coords_drug3
252+
253+
def Bias_Calc(Y_Test, Y_Pred):
254+
Error = Y_Test - Y_Pred
255+
Y_Test = Y_Test.reshape(len(Y_Test),1)
256+
Error = Error.reshape(len(Error),1)
257+
258+
reg = LinearRegression().fit(Y_Test, Error)
259+
Bias = reg.coef_[0]
260+
261+
return Bias
262+

0 commit comments

Comments
 (0)