@@ -233,7 +233,9 @@ def validate_software_mandatory_files(self, files_dict):
233
233
self .log_report .print_log_report (method_name , ["valid" , "warning" ])
234
234
return
235
235
236
- def add_bioinfo_results_metadata (self , files_dict , j_data , batch_id , output_folder = None ):
236
+ def add_bioinfo_results_metadata (
237
+ self , files_dict , j_data , batch_id , output_folder = None
238
+ ):
237
239
"""Adds metadata from bioinformatics results to j_data.
238
240
It first calls file_handlers and then maps the handled
239
241
data into j_data.
@@ -370,7 +372,12 @@ def handling_files(self, file_list, output_folder, batch_id):
370
372
import_statement = f"import { utils_name } "
371
373
exec (import_statement )
372
374
# Get method name and execute it.
373
- data = eval (utils_name + "." + func_name + "(file_list, batch_id, output_folder)" )
375
+ data = eval (
376
+ utils_name
377
+ + "."
378
+ + func_name
379
+ + "(file_list, batch_id, output_folder)"
380
+ )
374
381
except Exception as e :
375
382
self .log_report .update_log_report (
376
383
self .add_bioinfo_results_metadata .__name__ ,
@@ -751,12 +758,10 @@ def extract_batch_rows_to_file(file, sufix):
751
758
sample_col = file_df .columns [sample_colpos ]
752
759
file_df [sample_col ] = file_df [sample_col ].astype (str )
753
760
file_df = file_df [file_df [sample_col ].isin (batch_samples )]
754
-
761
+
755
762
base , ext = os .path .splitext (os .path .basename (file ))
756
763
new_filename = f"{ base } _{ sufix } { ext } "
757
- output_path = os .path .join (
758
- output_dir , "analysis_results" , new_filename
759
- )
764
+ output_path = os .path .join (output_dir , "analysis_results" , new_filename )
760
765
file_df .to_csv (output_path , index = False , sep = extdict .get (file_extension ))
761
766
return
762
767
@@ -771,7 +776,7 @@ def extract_batch_rows_to_file(file, sufix):
771
776
sample_colpos = self .get_sample_idx_colpos (key )
772
777
for file in files :
773
778
try :
774
- extract_batch_rows_to_file (file ,sufix )
779
+ extract_batch_rows_to_file (file , sufix )
775
780
except Exception as e :
776
781
if self .software_config [key ].get ("required" ):
777
782
log_type = "error"
@@ -794,17 +799,22 @@ def merge_metadata(self, batch_filepath, batch_data):
794
799
batch_data (dict): A dictionary containing metadata of the samples.
795
800
Returns:
796
801
None
797
- """
802
+ """
798
803
merged_metadata = relecov_tools .utils .read_json_file (batch_filepath )
799
- prev_metadata_dict = {item ["sequencing_sample_id" ]: item for item in merged_metadata }
804
+ prev_metadata_dict = {
805
+ item ["sequencing_sample_id" ]: item for item in merged_metadata
806
+ }
800
807
for item in batch_data :
801
808
sample_id = item ["sequencing_sample_id" ]
802
809
if sample_id in prev_metadata_dict :
803
810
# When sample already in metadata, checking whether dictionary is the same
804
811
if prev_metadata_dict [sample_id ] != item :
805
- stderr .print (f"[red] Sample { sample_id } has different data in { batch_filepath } and new metadata. Can't merge." )
812
+ stderr .print (
813
+ f"[red] Sample { sample_id } has different data in { batch_filepath } and new metadata. Can't merge."
814
+ )
806
815
log .error (
807
- "Sample %s has different data in %s and new metadata. Can't merge." % (sample_id , batch_filepath )
816
+ "Sample %s has different data in %s and new metadata. Can't merge."
817
+ % (sample_id , batch_filepath )
808
818
)
809
819
sys .exit (1 )
810
820
else :
@@ -842,11 +852,18 @@ def save_splitted_files(self, files_dict, batch_date, output_folder=None):
842
852
continue
843
853
try :
844
854
# Dynamically import the function from the specified module
845
- utils_name = f"relecov_tools.assets.pipeline_utils.{ self .software_name } "
855
+ utils_name = (
856
+ f"relecov_tools.assets.pipeline_utils.{ self .software_name } "
857
+ )
846
858
import_statement = f"import { utils_name } "
847
859
exec (import_statement )
848
860
# Get method name and execute it.
849
- data = eval (utils_name + "." + func_name + "(file_path, batch_date, output_folder)" )
861
+ data = eval (
862
+ utils_name
863
+ + "."
864
+ + func_name
865
+ + "(file_path, batch_date, output_folder)"
866
+ )
850
867
except Exception as e :
851
868
self .log_report .update_log_report (
852
869
self .save_splitted_files .__name__ ,
@@ -855,7 +872,7 @@ def save_splitted_files(self, files_dict, batch_date, output_folder=None):
855
872
)
856
873
sys .exit (self .log_report .print_log_report (method_name , ["error" ]))
857
874
return
858
-
875
+
859
876
def get_multiple_sample_files (self ):
860
877
method_name = f"{ self .add_bioinfo_files_path .__name__ } :{ self .get_multiple_sample_files .__name__ } "
861
878
multiple_sample_files = []
@@ -880,16 +897,20 @@ def create_bioinfo_file(self):
880
897
# Split files found based on each batch of samples
881
898
data_by_batch = self .split_data_by_batch (self .j_data )
882
899
batch_dates = []
883
- #Get batch date for all the samples
900
+ # Get batch date for all the samples
884
901
for batch_dir , batch_dict in data_by_batch .items ():
885
902
if batch_dir .split ("/" )[- 1 ] not in batch_dates :
886
903
batch_dates .append (batch_dir .split ("/" )[- 1 ])
887
904
888
905
if len (batch_dates ) == 1 :
889
906
batch_dates = str (batch_dates [0 ])
890
907
else :
891
- stderr .print (f"[orange]More than one batch date in the same json data. Using current date as batch date." )
892
- log .info ("]More than one batch date in the same json data. Using current date as batch date." )
908
+ stderr .print (
909
+ f"[orange]More than one batch date in the same json data. Using current date as batch date."
910
+ )
911
+ log .info (
912
+ "]More than one batch date in the same json data. Using current date as batch date."
913
+ )
893
914
batch_dates = datetime .now ().strftime ("%Y%m%d%H%M%S" )
894
915
895
916
# Add bioinfo metadata to j_data
@@ -917,8 +938,13 @@ def create_bioinfo_file(self):
917
938
batch_filename = tag + lab_code + "_" + batch_date + ".json"
918
939
batch_filepath = os .path .join (batch_dir , batch_filename )
919
940
if os .path .exists (batch_filepath ):
920
- stderr .print (f"[blue]Bioinfo metadata { batch_filepath } file already exists. Merging new data if possible." )
921
- log .info ("Bioinfo metadata %s file already exists. Merging new data if possible." % batch_filepath )
941
+ stderr .print (
942
+ f"[blue]Bioinfo metadata { batch_filepath } file already exists. Merging new data if possible."
943
+ )
944
+ log .info (
945
+ "Bioinfo metadata %s file already exists. Merging new data if possible."
946
+ % batch_filepath
947
+ )
922
948
batch_data = self .merge_metadata (batch_filepath , batch_data )
923
949
else :
924
950
relecov_tools .utils .write_json_fo_file (batch_data , batch_filepath )
@@ -941,8 +967,13 @@ def create_bioinfo_file(self):
941
967
stderr .print ("[blue]Writting output json file" )
942
968
file_path = os .path .join (out_path , batch_filename )
943
969
if os .path .exists (file_path ):
944
- stderr .print (f"[blue]Bioinfo metadata { file_path } file already exists. Merging new data if possible." )
945
- log .info ("Bioinfo metadata %s file already exists. Merging new data if possible." % file_path )
970
+ stderr .print (
971
+ f"[blue]Bioinfo metadata { file_path } file already exists. Merging new data if possible."
972
+ )
973
+ log .info (
974
+ "Bioinfo metadata %s file already exists. Merging new data if possible."
975
+ % file_path
976
+ )
946
977
batch_data = self .merge_metadata (file_path , self .j_data )
947
978
else :
948
979
relecov_tools .utils .write_json_fo_file (self .j_data , file_path )
0 commit comments