@@ -209,12 +209,12 @@ def process_datasets(args):
209
209
#-------------------------------------------------------------------
210
210
211
211
212
- # split_data_sets(
213
- # args=args,
214
- # data_sets=data_sets,
215
- # data_sets_info=data_sets_info,
216
- # response_data=response_data
217
- # )
212
+ split_data_sets (
213
+ args = args ,
214
+ data_sets = data_sets ,
215
+ data_sets_info = data_sets_info ,
216
+ response_data = response_data
217
+ )
218
218
219
219
#-------------------------------------------------------------------
220
220
# getting common / reference gene symbols
@@ -481,6 +481,41 @@ def process_datasets(args):
481
481
index = False ,
482
482
)
483
483
484
+
485
+ #-------------------------------------------------------------------
486
+ # create morgan table
487
+ #-------------------------------------------------------------------
488
+
489
+ dfs_to_merge = {}
490
+ for data_set in data_sets :
491
+ if (data_sets [data_set ].experiments is not None
492
+ and data_sets [data_set ].drug_descriptors is not None
493
+ ):
494
+ df_tmp = data_sets [data_set ].format (data_type = 'drug_descriptor' , shape = 'wide' )
495
+ df_tmp = df_tmp ['morgan fingerprint' ]
496
+ dfs_to_merge [data_set ] = df_tmp
497
+
498
+ concat_drugs = pd .concat (dfs_to_merge .values ())
499
+ out_df = concat_drugs .reset_index ()
500
+ out_df = out_df .drop_duplicates (subset = ['improve_drug_id' ], keep = 'first' )
501
+ out_df = pd .concat ((out_df , out_df ['morgan fingerprint' ].astype (str ).apply (lambda x : pd .Series (list (x ))).astype (int ).add_prefix ('ecfp4.' )), axis = 1 )
502
+ out_df = out_df .drop (['morgan fingerprint' ], axis = 1 )
503
+ out_df .rename (
504
+ columns = {'improve_drug_id' : 'improve_chem_id' },
505
+ inplace = True ,
506
+ )
507
+
508
+ outfile_path = args .WORKDIR .joinpath (
509
+ "data_out" ,
510
+ "x_data" ,
511
+ "drug_ecfp4_nbits1024.tsv"
512
+ )
513
+ out_df .to_csv (
514
+ path_or_buf = outfile_path ,
515
+ sep = '\t ' ,
516
+ index = False ,
517
+ )
518
+
484
519
#-------------------------------------------------------------------
485
520
# create mutation count table
486
521
#-------------------------------------------------------------------
0 commit comments