Skip to content

Commit 075e986

Browse files
Add files via upload
1 parent 54e2c34 commit 075e986

File tree

2 files changed

+245
-0
lines changed

2 files changed

+245
-0
lines changed

mpiHill_Hardcoded.py

+113
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
# -*- coding: utf-8 -*-
2+
"""
3+
Created on Sat Aug 10 18:04:36 2019
4+
5+
@authors: Ruibzhan & Omid Bazgir
6+
"""
7+
8+
# This code is written based on using Message Passing Interface (MPI) of python to run the hill climbing section of REFINED on HPCC very efficiently. To run tis code make sure to install mpi4py library of python
9+
# Some functions needed to run this code is written in the paraHill.py file do some specific computation
10+
from mpi4py import MPI
11+
import paraHill
12+
import pickle
13+
import numpy as np
14+
from itertools import product
15+
16+
#%% MPI set up
17+
comm = MPI.COMM_WORLD
18+
my_rank = comm.Get_rank()
19+
n_processors = comm.Get_size()
20+
print("Processors found: ",n_processors)
21+
22+
# Distributing the input data among the processors for parallel processing
23+
def scatter_list_to_processors(comm, data_list, n_processors):
24+
import math
25+
data_amount = len(data_list)
26+
heap_size = math.ceil(data_amount/(n_processors-1))
27+
28+
for pidx in range(1,n_processors):
29+
try:
30+
heap = data_list[heap_size*(pidx-1):heap_size*pidx]
31+
except:
32+
heap = data_list[heap_size*(pidx-1):]
33+
comm.send(heap,dest = pidx)
34+
35+
return True
36+
37+
# Receiving data from each processor and collect them into a vector(tensor)
38+
def receive_from_processors_to_dict(comm, n_processors):
39+
# receives dicts, combine them and return
40+
feedback = dict()
41+
for pidx in range(1,n_processors):
42+
receved = comm.recv(source=pidx)
43+
feedback.update(receved)
44+
return feedback
45+
46+
#%% load data
47+
with open('Init_MDS_Euc.pickle','rb') as file: # Loading the hill climbing input(initial MDS output)
48+
gene_names,dist_matr,init_map = pickle.load(file)
49+
50+
Nn = len(gene_names) # Number of features
51+
52+
NI = 5 # Number of iterations
53+
54+
# Check if the image is not squarred!
55+
if init_map.shape[0] != init_map.shape[1]:
56+
raise ValueError("For now only square images are considered.")
57+
58+
nn = init_map.shape[0] # Squarred output image size
59+
60+
# Converting feature numbers from string to integer for example feature 'F34' will be 34, in the MDS initial map
61+
init_map = np.char.strip(init_map.astype(str),'F').astype(int)
62+
map_in_int = init_map
63+
#%% Hill climbing
64+
Dist_evol = [] # Initializing distance evolution vector as an empty list
65+
if my_rank == 0:
66+
print("Initial distance: >>>",paraHill.universial_corr(dist_matr,map_in_int)) # Printing out difference between the inital distance matrix and the converted feature map
67+
for n_iter in range(NI): # Begin iterating process NI times
68+
# 9 initial coordinates.
69+
init_coords = [x for x in product([0,1,2],repeat = 2)] # Use a 3*3 window to exchange feature location in the feature map
70+
for init_coord in init_coords:
71+
# Update the mapping.
72+
broadcast_msg = map_in_int # Initial map will be broadcasted into all available processors
73+
comm.bcast(broadcast_msg,root = 0)
74+
# generate the centroids
75+
xxx = [init_coord[0]+i*3 for i in range(int(nn/3)+1) if (init_coord[0]+i*3)<nn]
76+
yyy = [init_coord[1]+i*3 for i in range(int(nn/3)+1) if (init_coord[1]+i*3)<nn]
77+
centr_list = [x for x in product(xxx,yyy)]
78+
# Master send and recv
79+
scatter_list_to_processors(comm,centr_list,n_processors) # scatter data
80+
swap_dict = receive_from_processors_to_dict(comm,n_processors) # collect data
81+
print(swap_dict)
82+
map_in_int = paraHill.execute_dict_swap(swap_dict, map_in_int) # Perform feature location exchange using *execute_dict_swap function
83+
84+
print(">",init_coord,"Corr:",paraHill.universial_corr(dist_matr,map_in_int)) # Report the distance
85+
86+
print(">>>",n_iter,"Corr:",paraHill.universial_corr(dist_matr,map_in_int)) # Report the overal distance cost after going over a window
87+
Dist_evol.append(paraHill.universial_corr(dist_matr,map_in_int)) # Calculate the distance evolution in each iteration and append it to the previous one
88+
89+
coords = np.array([[item[0] for item in np.where(map_in_int == ii)] for ii in range(Nn)]) # Generate the final REFINED coordinates
90+
# Save the REFINED coordinates
91+
with open("theMapping.pickle",'wb') as file:
92+
pickle.dump([gene_names,coords,map_in_int],file)
93+
import pandas as pd
94+
pd.Series(Dist_evol).to_csv("Distance_evolution.csv") # Save the distance evolution in a csv file
95+
else:
96+
# other processors
97+
for n_iter in range(NI):
98+
broadcast_msg = init_map # just for a size
99+
100+
# 9 initial Centroids
101+
for ii in range(9):
102+
#Update the mapping
103+
map_in_int = comm.bcast(broadcast_msg,root = 0)
104+
105+
centr_list = comm.recv(source = 0)
106+
each_swap_dict = paraHill.evaluate_centroids_in_list(centr_list,dist_matr,map_in_int)
107+
comm.send(each_swap_dict,dest = 0)
108+
#result = dict()
109+
#for each in data:
110+
# result.update({each: -each})
111+
#comm.send(result,dest = 0)
112+
113+
MPI.Finalize

paraHill.py

+132
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
# -*- coding: utf-8 -*-
2+
"""
3+
Created on Mon Aug 12 14:30:06 2019
4+
5+
@author: Ruibzhan & Omid Bazgir
6+
"""
7+
8+
from scipy.stats import pearsonr
9+
import numpy as np
10+
import random
11+
from scipy.spatial import distance
12+
import pickle
13+
import pandas as pd
14+
import time
15+
from itertools import product
16+
17+
#%%
18+
def universial_corr(dist_matr, mapping_in_int):
19+
# dist_matr is a sqr matr
20+
Nn = dist_matr.shape[0]
21+
# find what is the int coordinates for each feature, get an array
22+
# Because np.where returns a tuple (x_position array, y_position array), a generation is used
23+
coord = np.array([[item[0] for item in np.where(mapping_in_int == ii)] for ii in range(Nn)])
24+
# get a 1-d form distance the euclidean dist between pixles positions
25+
pixel_dist = distance.pdist(coord)
26+
pixel_dist = pixel_dist.reshape(len(pixel_dist),1)
27+
# convert the 2-d distance to 1d distance
28+
feature_dist = distance.squareform(dist_matr)
29+
feature_dist = feature_dist.reshape(len(feature_dist),1)
30+
## pearsonr returns a tuple
31+
#corrs = pearsonr(feature_dist,pixel_dist)[0]
32+
L2_Norm = np.sqrt(sum((pixel_dist - feature_dist)**2)/sum(feature_dist**2))
33+
return L2_Norm
34+
#%%
35+
def evaluate_swap(coord1,coord2,dist_matr,mapping_in_int,original_corr = -2):
36+
# Coord are in list[]
37+
# Avoid messing up with the origianl map
38+
# The correlation before swap can be passed to save some calculation
39+
the_map = mapping_in_int.copy()
40+
# If out of bound, return NaN.
41+
if coord1[0]<0 or coord1[1]<0 or coord2[0]<0 or coord2[1]<0:
42+
return np.nan
43+
if coord1[0]>=the_map.shape[0] or coord1[1]>=the_map.shape[0] or coord2[0]>=the_map.shape[0] or coord2[1]>=the_map.shape[0]:
44+
return np.nan
45+
# If not given, recompute.
46+
if original_corr<-1 or original_corr>1:
47+
original_corr = universial_corr(dist_matr,the_map)
48+
# Swap
49+
try:
50+
temp = the_map[coord1[0],coord1[1]]
51+
the_map[coord1[0],coord1[1]] = the_map[coord2[0],coord2[1]]
52+
the_map[coord2[0],coord2[1]] = temp
53+
changed_corr = universial_corr(dist_matr,the_map)
54+
return(changed_corr - original_corr)
55+
except IndexError:
56+
raise Warning ("Swap index:", coord1,coord2,"Index error. Check the coordnation.")
57+
return np.nan
58+
59+
def evaluate_centroid(centroid,dist_matr,mapping_in_int):
60+
original_corr = universial_corr(dist_matr,mapping_in_int)
61+
results = [100000] # just to skip the 0 position
62+
for each_direc in product([-1,0,1],repeat = 2):
63+
#print(each_direc)
64+
# directions are returned as tuple (-1,1), (-1,0), (-1,1), (0,0), ....
65+
swap_coord = [centroid[0]+each_direc[0],centroid[1]+each_direc[1]]
66+
evaluation = evaluate_swap(centroid,swap_coord,dist_matr,mapping_in_int,original_corr)
67+
results.append(evaluation)
68+
results_array = np.array(results)
69+
#best_swap_direc = np.where(results_array == np.nanmax(results_array))[0][0]
70+
best_swap_direc = np.where(results_array == np.nanmin(results_array))[0][0]
71+
# Give the best direction as a int
72+
return best_swap_direc
73+
74+
def evaluate_centroids_in_list(centroids_list,dist_matr,mapping_in_int):
75+
# and returns a dict
76+
results = dict()
77+
for each_centr in centroids_list:
78+
each_centr = tuple(each_centr)
79+
evaluation = evaluate_centroid(each_centr,dist_matr,mapping_in_int)
80+
results.update({each_centr:evaluation})
81+
return results
82+
83+
#%%
84+
def execute_coordination_swap(coord1,coord2,mapping_in_int):
85+
# try passing the ref. directly
86+
the_map = mapping_in_int#.copy()
87+
# If out of bound, return NaN.
88+
if coord1[0]<0 or coord1[1]<0 or coord2[0]<0 or coord2[1]<0:
89+
raise Warning("Swapping failed:",coord1,coord2,"-- Negative coordnation.")
90+
return the_map
91+
if coord1[0]>the_map.shape[0] or coord1[1]>the_map.shape[0] or coord2[0]>the_map.shape[0] or coord2[1]>the_map.shape[0]:
92+
raise Warning("Swapping failed:",coord1,coord2,"-- Coordnation out of bound.")
93+
return the_map
94+
95+
temp = the_map[coord1[0],coord1[1]]
96+
the_map[coord1[0],coord1[1]] = the_map[coord2[0],coord2[1]]
97+
the_map[coord2[0],coord2[1]] = temp
98+
99+
return(the_map)
100+
101+
# Initial centriod id & Swapping direction:
102+
# 1 2 3
103+
# 4 5 6
104+
# 7 8 9
105+
# 0 in swapping is preserved for the header.
106+
107+
def execute_direction_swap(centroid,mapping_in_int,direction = 5):
108+
# Need to notice that [0] is the vertival coord, [1] is the horiz coord. similar to the matlab images.
109+
coord1 = list(centroid)
110+
coord2 = list(centroid)
111+
if direction not in range(1,10):
112+
raise ValueError("Invalid swapping direction.")
113+
if direction == 5:
114+
return mapping_in_int
115+
116+
if direction in [1,4,7]:
117+
coord2[1] -=1
118+
elif direction in [3,6,9]:
119+
coord2[1] +=1
120+
121+
if direction in [1,2,3]:
122+
coord2[0] -=1
123+
elif direction in [7,8,9]:
124+
coord2[0] +=1
125+
126+
the_map = execute_coordination_swap(coord1,coord2,mapping_in_int)
127+
return the_map
128+
129+
def execute_dict_swap(swapping_dict, mapping_in_int):
130+
for each_key in swapping_dict:
131+
execute_direction_swap(each_key,mapping_in_int,direction = swapping_dict[each_key])
132+
return mapping_in_int

0 commit comments

Comments
 (0)