Add files via upload

omidbazgirTTU · web-flow · commit 075e986b8845 · 2019-11-16T17:41:39.000-06:00
diff --git a/mpiHill_Hardcoded.py b/mpiHill_Hardcoded.py
@@ -0,0 +1,113 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Sat Aug 10 18:04:36 2019
+
+@authors: Ruibzhan & Omid Bazgir
+"""
+
+# This code is written based on using Message Passing Interface (MPI) of python to run the hill climbing section of REFINED on HPCC very efficiently. To run tis code make sure to install mpi4py library of python
+# Some functions needed to run this code is written in the paraHill.py file do some specific computation
+from mpi4py import MPI 
+import paraHill
+import pickle
+import numpy as np
+from itertools import product
+
+#%% MPI set up 
+comm = MPI.COMM_WORLD
+my_rank = comm.Get_rank()
+n_processors = comm.Get_size()
+print("Processors found: ",n_processors)
+
+# Distributing the input data among the processors for parallel processing
+def scatter_list_to_processors(comm, data_list, n_processors):
+    import math
+    data_amount = len(data_list)
+    heap_size = math.ceil(data_amount/(n_processors-1))
+
+    for pidx in range(1,n_processors):
+        try:
+            heap = data_list[heap_size*(pidx-1):heap_size*pidx]
+        except:
+            heap = data_list[heap_size*(pidx-1):]
+        comm.send(heap,dest = pidx)
+
+    return True
+
+# Receiving data from each processor and collect them into a vector(tensor)
+def receive_from_processors_to_dict(comm, n_processors):
+    # receives dicts, combine them and return
+    feedback = dict()
+    for pidx in range(1,n_processors):
+        receved = comm.recv(source=pidx)
+        feedback.update(receved)
+    return feedback
+
+#%% load data
+with open('Init_MDS_Euc.pickle','rb') as file:																# Loading the hill climbing input(initial MDS output)
+    gene_names,dist_matr,init_map = pickle.load(file)
+
+Nn = len(gene_names)																						# Number of features
+
+NI = 5 																										# Number of iterations
+
+# Check if the image is not squarred!
+if init_map.shape[0] != init_map.shape[1]:
+    raise ValueError("For now only square images are considered.")
+	
+nn = init_map.shape[0]																					    # Squarred output image size 			
+
+# Converting feature numbers from string to integer for example feature 'F34' will be 34, in the MDS initial map 
+init_map = np.char.strip(init_map.astype(str),'F').astype(int)
+map_in_int = init_map   
+#%% Hill climbing
+Dist_evol = []																								# Initializing distance evolution vector as an empty list			
+if my_rank == 0:
+    print("Initial distance: >>>",paraHill.universial_corr(dist_matr,map_in_int))							# Printing out difference between the inital distance matrix and the converted feature map
+    for n_iter in range(NI):																				# Begin iterating process NI times
+        # 9 initial coordinates. 
+        init_coords = [x for x in product([0,1,2],repeat = 2)]												# Use a 3*3 window to exchange feature location in the feature map
+        for init_coord in init_coords:
+            # Update the mapping. 
+            broadcast_msg = map_in_int  																	# Initial map will be broadcasted into all available processors
+            comm.bcast(broadcast_msg,root = 0)
+            # generate the centroids
+            xxx = [init_coord[0]+i*3 for i in range(int(nn/3)+1) if (init_coord[0]+i*3)<nn]						
+            yyy = [init_coord[1]+i*3 for i in range(int(nn/3)+1) if (init_coord[1]+i*3)<nn]
+            centr_list = [x for x in product(xxx,yyy)]
+            # Master send and recv
+            scatter_list_to_processors(comm,centr_list,n_processors)										# scatter data
+            swap_dict = receive_from_processors_to_dict(comm,n_processors)									# collect data
+            print(swap_dict)
+            map_in_int = paraHill.execute_dict_swap(swap_dict, map_in_int)									# Perform feature location exchange using *execute_dict_swap function 
+            
+            print(">",init_coord,"Corr:",paraHill.universial_corr(dist_matr,map_in_int))                    # Report the distance
+
+        print(">>>",n_iter,"Corr:",paraHill.universial_corr(dist_matr,map_in_int))							# Report the overal distance cost after going over a window			
+        Dist_evol.append(paraHill.universial_corr(dist_matr,map_in_int))									# Calculate the distance evolution in each iteration and append it to the previous one	
+        
+    coords = np.array([[item[0] for item in np.where(map_in_int == ii)] for ii in range(Nn)])				# Generate the final REFINED coordinates
+    # Save the REFINED coordinates
+	with open("theMapping.pickle",'wb') as file:
+        pickle.dump([gene_names,coords,map_in_int],file)
+    import pandas as pd
+    pd.Series(Dist_evol).to_csv("Distance_evolution.csv")													# Save the distance evolution in a csv file    
+else:
+    # other processors
+    for n_iter in range(NI):
+        broadcast_msg = init_map    # just for a size
+
+        # 9 initial Centroids
+        for ii in range(9):
+            #Update the mapping
+            map_in_int = comm.bcast(broadcast_msg,root = 0)
+            
+            centr_list = comm.recv(source = 0)
+            each_swap_dict = paraHill.evaluate_centroids_in_list(centr_list,dist_matr,map_in_int)
+            comm.send(each_swap_dict,dest = 0)
+    #result = dict()
+    #for each in data:
+    #    result.update({each: -each})
+    #comm.send(result,dest = 0)
+
+MPI.Finalize
diff --git a/paraHill.py b/paraHill.py
@@ -0,0 +1,132 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Aug 12 14:30:06 2019
+
+@author: Ruibzhan & Omid Bazgir
+"""
+
+from scipy.stats import pearsonr
+import numpy as np
+import random
+from scipy.spatial import distance
+import pickle
+import pandas as pd
+import time
+from itertools import product
+
+#%%
+def universial_corr(dist_matr, mapping_in_int):
+    # dist_matr is a sqr matr
+    Nn = dist_matr.shape[0]
+    # find what is the int coordinates for each feature, get an array
+    # Because np.where returns a tuple (x_position array, y_position array), a generation is used
+    coord = np.array([[item[0] for item in np.where(mapping_in_int == ii)] for ii in range(Nn)])
+    # get a 1-d form distance the euclidean dist between pixles positions
+    pixel_dist = distance.pdist(coord)
+    pixel_dist = pixel_dist.reshape(len(pixel_dist),1)
+    # convert the 2-d distance to 1d distance
+    feature_dist = distance.squareform(dist_matr)
+    feature_dist = feature_dist.reshape(len(feature_dist),1)
+    ## pearsonr returns a tuple
+    #corrs = pearsonr(feature_dist,pixel_dist)[0]
+    L2_Norm = np.sqrt(sum((pixel_dist - feature_dist)**2)/sum(feature_dist**2))
+    return L2_Norm
+#%%
+def evaluate_swap(coord1,coord2,dist_matr,mapping_in_int,original_corr = -2):
+    # Coord are in list[]
+    # Avoid messing up with the origianl map
+    # The correlation before swap can be passed to save some calculation
+    the_map = mapping_in_int.copy()
+    # If out of bound, return NaN. 
+    if coord1[0]<0 or coord1[1]<0 or coord2[0]<0 or coord2[1]<0:
+        return np.nan
+    if coord1[0]>=the_map.shape[0] or coord1[1]>=the_map.shape[0] or coord2[0]>=the_map.shape[0] or coord2[1]>=the_map.shape[0]:
+        return np.nan
+    # If not given, recompute.
+    if original_corr<-1 or original_corr>1:
+        original_corr = universial_corr(dist_matr,the_map)
+    # Swap
+    try:
+        temp = the_map[coord1[0],coord1[1]] 
+        the_map[coord1[0],coord1[1]] = the_map[coord2[0],coord2[1]]
+        the_map[coord2[0],coord2[1]] = temp
+        changed_corr = universial_corr(dist_matr,the_map)
+        return(changed_corr - original_corr)
+    except IndexError:
+        raise Warning ("Swap index:", coord1,coord2,"Index error. Check the coordnation.")
+        return np.nan
+    
+def evaluate_centroid(centroid,dist_matr,mapping_in_int):
+    original_corr = universial_corr(dist_matr,mapping_in_int)
+    results = [100000] # just to skip the 0 position
+    for each_direc in product([-1,0,1],repeat = 2):
+        #print(each_direc)
+        # directions are returned as tuple (-1,1), (-1,0), (-1,1), (0,0), ....
+        swap_coord = [centroid[0]+each_direc[0],centroid[1]+each_direc[1]]
+        evaluation = evaluate_swap(centroid,swap_coord,dist_matr,mapping_in_int,original_corr)
+        results.append(evaluation)
+    results_array = np.array(results)
+    #best_swap_direc = np.where(results_array == np.nanmax(results_array))[0][0]
+    best_swap_direc = np.where(results_array == np.nanmin(results_array))[0][0]
+    # Give the best direction as a int
+    return best_swap_direc
+
+def evaluate_centroids_in_list(centroids_list,dist_matr,mapping_in_int):
+    # and returns a dict
+    results = dict()
+    for each_centr in centroids_list:
+        each_centr = tuple(each_centr)
+        evaluation = evaluate_centroid(each_centr,dist_matr,mapping_in_int)
+        results.update({each_centr:evaluation})
+    return results
+
+#%%
+def execute_coordination_swap(coord1,coord2,mapping_in_int):
+    # try passing the ref. directly 
+    the_map = mapping_in_int#.copy()
+    # If out of bound, return NaN. 
+    if coord1[0]<0 or coord1[1]<0 or coord2[0]<0 or coord2[1]<0:
+        raise Warning("Swapping failed:",coord1,coord2,"-- Negative coordnation.")
+        return the_map
+    if coord1[0]>the_map.shape[0] or coord1[1]>the_map.shape[0] or coord2[0]>the_map.shape[0] or coord2[1]>the_map.shape[0]:
+        raise Warning("Swapping failed:",coord1,coord2,"-- Coordnation out of bound.")
+        return the_map
+
+    temp = the_map[coord1[0],coord1[1]] 
+    the_map[coord1[0],coord1[1]] = the_map[coord2[0],coord2[1]]
+    the_map[coord2[0],coord2[1]] = temp
+
+    return(the_map)
+
+# Initial centriod id & Swapping direction: 
+# 1 2 3
+# 4 5 6
+# 7 8 9
+# 0 in swapping is preserved for the header.
+
+def execute_direction_swap(centroid,mapping_in_int,direction = 5):
+    # Need to notice that [0] is the vertival coord, [1] is the horiz coord. similar to the matlab images.
+    coord1 = list(centroid)
+    coord2 = list(centroid)
+    if direction not in range(1,10):
+        raise ValueError("Invalid swapping direction.")
+    if direction == 5:
+        return mapping_in_int
+
+    if direction in [1,4,7]:
+        coord2[1] -=1
+    elif direction in [3,6,9]:
+        coord2[1] +=1
+
+    if direction in [1,2,3]:
+        coord2[0] -=1
+    elif direction in [7,8,9]:
+        coord2[0] +=1
+
+    the_map = execute_coordination_swap(coord1,coord2,mapping_in_int)
+    return the_map
+
+def execute_dict_swap(swapping_dict, mapping_in_int):
+    for each_key in swapping_dict:
+        execute_direction_swap(each_key,mapping_in_int,direction = swapping_dict[each_key])
+    return mapping_in_int