@@ -439,6 +439,41 @@ def process_datasets(args):
439
439
)
440
440
441
441
442
+ #-------------------------------------------------------------------
443
+ # create drug_info table
444
+ #-------------------------------------------------------------------
445
+
446
+ dfs_to_merge = {}
447
+ for data_set in data_sets :
448
+ if (data_sets [data_set ].experiments is not None
449
+ and data_sets [data_set ].drugs is not None
450
+ ):
451
+ dfs_to_merge [data_set ] = deepcopy (data_sets [data_set ].drugs )
452
+
453
+ concat_drugs = pd .concat (dfs_to_merge .values ())
454
+ out_df = deepcopy (concat_drugs )
455
+ out_df ['SMILES' ] = concat_drugs ['canSMILES' ]
456
+ out_df ['DrugID' ] = concat_drugs ['improve_drug_id' ]
457
+ out_df ['CAS_ID' ] = None
458
+ out_df .drop (['formula' , 'weight' , 'InChIKey' ], axis = 1 , inplace = True )
459
+ out_df = out_df [['DrugID' , 'SMILES' , 'canSMILES' , 'chem_name' , 'pubchem_id' , 'CAS_ID' , 'improve_drug_id' ]]
460
+ out_df = out_df .rename (columns = {'chem_name' : 'NAME' , 'pubchem_id' : 'PUBCHEM_ID' , 'improve_drug_id' :'improve_chem_id' })
461
+ out_df ['PUBCHEM_ID' ] = out_df ['PUBCHEM_ID' ].fillna (0 )
462
+ out_df ['PUBCHEM_ID' ] = pd .to_numeric (out_df ['PUBCHEM_ID' ], errors = 'coerce' , downcast = 'integer' )
463
+ out_df ['PUBCHEM_ID' ] = out_df ['PUBCHEM_ID' ].replace (0 , None )
464
+
465
+ outfile_path = args .WORKDIR .joinpath (
466
+ "data_out" ,
467
+ "x_data" ,
468
+ "drug_info.tsv"
469
+ )
470
+ out_df .to_csv (
471
+ path_or_buf = outfile_path ,
472
+ sep = '\t ' ,
473
+ index = False ,
474
+ )
475
+
476
+
442
477
#-------------------------------------------------------------------
443
478
# create mordred table
444
479
#-------------------------------------------------------------------
0 commit comments