@@ -209,12 +209,12 @@ def process_datasets(args):
209
209
#-------------------------------------------------------------------
210
210
211
211
212
- split_data_sets (
213
- args = args ,
214
- data_sets = data_sets ,
215
- data_sets_info = data_sets_info ,
216
- response_data = response_data
217
- )
212
+ # split_data_sets(
213
+ # args=args,
214
+ # data_sets=data_sets,
215
+ # data_sets_info=data_sets_info,
216
+ # response_data=response_data
217
+ # )
218
218
219
219
#-------------------------------------------------------------------
220
220
# getting common / reference gene symbols
@@ -439,6 +439,48 @@ def process_datasets(args):
439
439
)
440
440
441
441
442
+ #-------------------------------------------------------------------
443
+ # create mordred table
444
+ #-------------------------------------------------------------------
445
+
446
+ dfs_to_merge = {}
447
+ for data_set in data_sets :
448
+ if (data_sets [data_set ].experiments is not None
449
+ and data_sets [data_set ].drug_descriptors is not None
450
+ ):
451
+ df_tmp = data_sets [data_set ].format (data_type = 'drug_descriptor' , shape = 'wide' )
452
+ df_tmp = df_tmp .drop (columns = ['morgan fingerprint' ]).add_prefix ('mordred.' )
453
+ dfs_to_merge [data_set ] = df_tmp
454
+
455
+ concat_drugs = pd .concat (dfs_to_merge .values ())
456
+ concat_drugs = concat_drugs .replace ({'False' : '0' , 'True' : '1' })
457
+ cols = concat_drugs .columns
458
+ concat_drugs [cols ] = concat_drugs [cols ].apply (pd .to_numeric , errors = 'coerce' )
459
+ out_df = concat_drugs .reset_index ()
460
+ out_df = out_df .fillna (0 ).round (4 ).drop_duplicates (subset = ['improve_drug_id' ], keep = 'first' )
461
+
462
+ if args .EXCL_DRUGS_LIST is not None :
463
+ logger .info (
464
+ f"Removing all chemical compunds with ids: '{ args .EXCL_DRUGS_LIST } '"
465
+ )
466
+ out_df = out_df [~ out_df ['improve_drug_id' ].isin (args .EXCL_DRUGS_LIST )]
467
+
468
+ out_df .rename (
469
+ columns = {'improve_drug_id' : 'improve_chem_id' },
470
+ inplace = True ,
471
+ )
472
+
473
+ outfile_path = args .WORKDIR .joinpath (
474
+ "data_out" ,
475
+ "x_data" ,
476
+ "drug_mordred.tsv"
477
+ )
478
+ out_df .to_csv (
479
+ path_or_buf = outfile_path ,
480
+ sep = '\t ' ,
481
+ index = False ,
482
+ )
483
+
442
484
#-------------------------------------------------------------------
443
485
# create mutation count table
444
486
#-------------------------------------------------------------------
@@ -514,6 +556,7 @@ def process_datasets(args):
514
556
index = False
515
557
)
516
558
559
+
517
560
def split_data_sets (
518
561
args : dict ,
519
562
data_sets : dict ,
0 commit comments