Skip to content

Commit adac2c0

Browse files
Add files via upload
1 parent a72c760 commit adac2c0

File tree

1 file changed

+43
-0
lines changed

1 file changed

+43
-0
lines changed

Initial_MDS.py

+43
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
import pickle
2+
import numpy as np
3+
import pandas as pd
4+
import cv2
5+
import matplotlib.pyplot as plt
6+
import scipy.misc
7+
import Toolbox
8+
from Toolbox import two_d_eq, Assign_features_to_pixels
9+
from sklearn.manifold import MDS
10+
from sklearn.metrics.pairwise import euclidean_distances
11+
import math
12+
13+
#%% Loading the data
14+
Feat_DF = pd.read_csv("...\\normalized_padel_feats_NCI60_672_small.csv")
15+
16+
X = Feat_DF.values; X = X[:,2:]
17+
original_input = pd.DataFrame(data = X) # The MDS input should be in a dataframe format
18+
feature_names_list = original_input.columns.tolist() # Extracting feature_names_list (gene_names or descriptor_names)
19+
print(">>>> Data is loaded")
20+
21+
#%% MDS
22+
nn = math.ceil(np.sqrt(len(feature_names_list))) # Image dimension
23+
Nn = original_input.shape[1] # Number of features
24+
25+
transposed_input = original_input.T # The MDS input data must be transposed , because we want summarize each feature by two values (as compard to regular dimensionality reduction each sample will be described by two values)
26+
Euc_Dist = euclidean_distances(transposed_input) # Euclidean distance
27+
Euc_Dist = np.maximum(Euc_Dist, Euc_Dist.transpose()) # Making the Euclidean distance matrix symmetric
28+
29+
embedding = MDS(n_components=2) # Reduce the dimensionality by MDS into 2 components
30+
mds_xy = embedding.fit_transform(transposed_input) # Apply MDS
31+
32+
print(">>>> MDS dimensionality reduction is done")
33+
34+
eq_xy = two_d_eq(mds_xy,Nn)
35+
Img = Assign_features_to_pixels(eq_xy,nn,verbose=1) # Img is the none-overlapping coordinates generated by MDS
36+
37+
#%% To be saved for hill climbing
38+
Desc = Feat_DF.columns.tolist(); Desc = Desc[2:] # Drug descriptors name
39+
Dist = pd.DataFrame(data = Euc_Dist, columns = Desc, index = Desc) # Generating a distance matrix which includes the Euclidean distance between each and every descriptor
40+
data = (Desc, Dist, Img ) # Preparing the hill climbing inputs
41+
42+
with open("...\\Init_MDS_Euc.pickle", 'wb') as f: # The hill climbing input is a pickle, therefore everything is saved as a pickle to be loaded by the hill climbing
43+
pickle.dump(data, f)

0 commit comments

Comments
 (0)