|
| 1 | +# -*- coding: utf-8 -*- |
| 2 | +""" |
| 3 | +Created on Mon Oct 7 18:41:14 2019 |
| 4 | +
|
| 5 | +@author: obazgir |
| 6 | +""" |
| 7 | + |
1 | 8 | import numpy as np
|
2 | 9 | import pandas as pd
|
3 | 10 | import os
|
| 11 | +import matplotlib.pyplot as plt |
| 12 | +from sklearn.model_selection import train_test_split |
| 13 | +from fancyimpute import KNN |
4 | 14 | from scipy.stats import pearsonr
|
| 15 | +from sklearn.linear_model import LinearRegression |
| 16 | + |
| 17 | + |
| 18 | + |
| 19 | + |
| 20 | +#%% NRMSE |
| 21 | +def NRMSE(Y_Target, Y_Predict): |
| 22 | + Y_Target = np.array(Y_Target); Y_Predict = np.array(Y_Predict); |
| 23 | + Y_Target = Y_Target.reshape(len(Y_Target),1); Y_Predict = Y_Predict.reshape(len(Y_Predict),1); |
| 24 | + Y_Bar = np.mean(Y_Target) |
| 25 | + Nom = np.sum((Y_Predict - Y_Target)**2); Denom = np.sum((Y_Bar - Y_Target)**2) |
| 26 | + MSE = np.mean((Y_Predict - Y_Target)**2); NRMSE = np.sqrt(Nom/Denom) |
| 27 | + R2 = 1 - NRMSE**2 |
| 28 | + return NRMSE, R2 |
| 29 | + |
| 30 | +def NMAE(Y_Target, Y_Predict): |
| 31 | + Y_Target = np.array(Y_Target); Y_Predict = np.array(Y_Predict); |
| 32 | + Y_Target = Y_Target.reshape(len(Y_Target),1); Y_Predict = Y_Predict.reshape(len(Y_Predict),1); |
| 33 | + Y_Bar = np.mean(Y_Target) |
| 34 | + Nom = np.abs((Y_Predict - Y_Target)); Denom = np.abs((Y_Bar - Y_Target)) |
| 35 | + NormMAE = np.mean(Nom)/np.mean(Denom) |
| 36 | + return NormMAE |
| 37 | + |
| 38 | + |
| 39 | +#%% Random position generation |
| 40 | +import math |
| 41 | +def Random_position(p): |
| 42 | + NN = int(math.sqrt(p)) +1 |
| 43 | + Feat_num = np.arange(p) |
| 44 | + np.random.shuffle(Feat_num) |
| 45 | + #Feature_List = [f'F{i}' for i in Feat_num] |
| 46 | + Pos_mat = [] |
| 47 | + for i in range(p): |
| 48 | + Pos_mat.append([int(Feat_num[i]/NN),int(Feat_num[i]%NN)]) |
| 49 | + return(Pos_mat) |
| 50 | + |
| 51 | +def Random_Image_Gen(X, Rand_Pos_mat): |
| 52 | + sz = X.shape |
| 53 | + p = sz[1]; N = sz[0] |
| 54 | + NN = int(math.sqrt(p)) +1 |
| 55 | + Im = np.zeros((NN,NN)) |
| 56 | + X_Gen = np.zeros((N,NN**2)) |
| 57 | + for j in range(N): |
| 58 | + for i in range(p): |
| 59 | + P = Rand_Pos_mat[i] |
| 60 | + Im[P[0],P[1]] = X[j,i] |
| 61 | + Image_Store = Im.reshape((NN**2,1)).T |
| 62 | + X_Gen[j,:] = Image_Store |
| 63 | + return X_Gen |
| 64 | + |
| 65 | +#%% MDS by Ruibo |
5 | 66 | from sklearn.manifold import MDS
|
6 | 67 |
|
7 | 68 | def two_d_norm(xy):
|
@@ -101,3 +162,101 @@ def Assign_features_to_pixels(xy,nn,verbose = False):
|
101 | 162 | #eq_xy = two_d_eq(mds_xy)
|
102 | 163 | #Img = Assign_features_to_pixels(eq_xy,nn,verbose=1)
|
103 | 164 | #Init_Corr_MDS = InitCorr(dist_mat,Img,nn)
|
| 165 | + |
| 166 | +def MDS_Im_Gen(X,nn, Img): |
| 167 | + [N_sam,P_Feat] = X.shape |
| 168 | + X_Gen = np.zeros((N_sam,nn**2)) |
| 169 | + conv_Img = Img.reshape(Img.size,1) |
| 170 | + for i in range(nn**2): |
| 171 | + Feature = np.array(conv_Img[i]); Feature = Feature[0]; F_Num = int(Feature[1:]) |
| 172 | + if abs(F_Num) < nn**2: |
| 173 | + X_Gen[:,i] = X[:,F_Num] |
| 174 | + else: |
| 175 | + X_Gen[:,i] = 0 |
| 176 | + return X_Gen |
| 177 | + |
| 178 | +#%% CCLE functions |
| 179 | +def dataframer(Main,Set_in, name_in, name_out): |
| 180 | + A = Set_in[name_in].tolist() |
| 181 | + Set_out = Main[Main[name_out] == A[0]] |
| 182 | + for cell in range(len(A) - 1): |
| 183 | + df = Main[Main[name_out] == A[cell + 1]] |
| 184 | + Set_out = pd.concat([Set_out, df]) |
| 185 | + return Set_out |
| 186 | + |
| 187 | +def Reg_to_Class(Y,Threshold): |
| 188 | + Y_Class = np.zeros(len(Y)) |
| 189 | + Y_Sens = np.where(Y > Threshold) |
| 190 | + Y_Class[Y_Sens] = 1 |
| 191 | + Y_Class = Y_Class.astype(int) |
| 192 | + Y_Class = Y_Class.tolist() |
| 193 | + Y_Class = np.array(Y_Class) |
| 194 | + return Y_Class |
| 195 | + |
| 196 | +def floattoint(Y_Test_Encoded): |
| 197 | + Y_Class = np.zeros(Y_Test_Encoded.shape) |
| 198 | + Y_Sens = np.where(Y_Test_Encoded > 0.5) |
| 199 | + Y_Class[Y_Sens] = 1 |
| 200 | + Y_Class = Y_Class.astype(int) |
| 201 | + Y_Class = Y_Class.tolist() |
| 202 | + Y_Class = np.array(Y_Class) |
| 203 | + return Y_Class |
| 204 | +def REFINED_Im_Gen(X,nn, map_in_int, gene_names,coords): |
| 205 | + [N_sam,P_Feat] = X.shape |
| 206 | + X_Gen = np.zeros((N_sam,nn**2)) |
| 207 | + for i in range(N_sam): |
| 208 | + data = X[i,:] |
| 209 | + X_REFINED = pd.DataFrame(data = data.reshape(1,len(data)), columns = gene_names) |
| 210 | + Image = np.zeros(map_in_int.shape) |
| 211 | + for j in range(len(coords)): |
| 212 | + val = np.array(X_REFINED[gene_names[j]]) |
| 213 | + Image[coords[j,0],coords[j,1]] = val |
| 214 | + Image = Image.reshape(nn**2) |
| 215 | + X_Gen[i,:] = Image |
| 216 | + return X_Gen |
| 217 | +#%% GDSC |
| 218 | +def GDSC_dataframer(PD_Set, Set_Name,PD_Attribute,Attribute_Name): |
| 219 | + A = PD_Set[Set_Name].tolist() |
| 220 | + b = PD_Attribute[PD_Attribute[Attribute_Name] == A[0]].reset_index().drop(columns = ['index']) |
| 221 | + Data_arry = np.array(b.values[0,1:],dtype = float) |
| 222 | + Data_arry = Data_arry.reshape(1,len(Data_arry)) |
| 223 | + for i in range(len(A) - 1 ): |
| 224 | + b = PD_Attribute[PD_Attribute[Attribute_Name] == A[i + 1]].reset_index().drop(columns = ['index']) |
| 225 | + Arr = np.array(b.values[0,1:],dtype = float) |
| 226 | + Arr = Arr.reshape(1,len(Arr)) |
| 227 | + Data_arry = np.append(Data_arry,Arr, axis = 0) |
| 228 | + |
| 229 | + PD_Data_arry = pd.DataFrame(data = Data_arry, columns = PD_Attribute.columns.tolist()[1:], index = A) |
| 230 | + return Data_arry, PD_Data_arry |
| 231 | + |
| 232 | +def GDSC_NPier(PD_Set, Set_Name,PD_Attribute,Attribute_Name): |
| 233 | + PD_Set = PD_Set.reset_index() |
| 234 | + PD_Set.shape[0] |
| 235 | + PD_Attribute.shape[1] - 1 |
| 236 | + X_NP = np.zeros((PD_Set.shape[0],PD_Attribute.shape[1] - 1)) |
| 237 | + Source = list(set(PD_Set[Set_Name].tolist())) |
| 238 | + for name in Source: |
| 239 | + idx = PD_Set.index[PD_Set[Set_Name] == name].tolist() |
| 240 | + XX = np.array(PD_Attribute[PD_Attribute[Attribute_Name] == name].values[0,1:], dtype = float) |
| 241 | + X_NP[idx,:] = XX |
| 242 | + return X_NP |
| 243 | + |
| 244 | + |
| 245 | +def Coord_Converter(coords_drug2,nn): |
| 246 | + coords_drug3 = np.full((nn,nn),'NaN').astype(object) |
| 247 | + for i in range(nn): |
| 248 | + for j in range(nn): |
| 249 | + ft = 'F' + str(coords_drug2[i,j]) |
| 250 | + coords_drug3[i,j] = ft |
| 251 | + return coords_drug3 |
| 252 | + |
| 253 | +def Bias_Calc(Y_Test, Y_Pred): |
| 254 | + Error = Y_Test - Y_Pred |
| 255 | + Y_Test = Y_Test.reshape(len(Y_Test),1) |
| 256 | + Error = Error.reshape(len(Error),1) |
| 257 | + |
| 258 | + reg = LinearRegression().fit(Y_Test, Error) |
| 259 | + Bias = reg.coef_[0] |
| 260 | + |
| 261 | + return Bias |
| 262 | + |
0 commit comments