Skip to content

Commit c5b8e24

Browse files
committed
added output for drug_info.tsv
1 parent 871a0e0 commit c5b8e24

File tree

1 file changed

+35
-0
lines changed

1 file changed

+35
-0
lines changed

scripts/prepare_data_for_improve.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -439,6 +439,41 @@ def process_datasets(args):
439439
)
440440

441441

442+
#-------------------------------------------------------------------
443+
# create drug_info table
444+
#-------------------------------------------------------------------
445+
446+
dfs_to_merge = {}
447+
for data_set in data_sets:
448+
if (data_sets[data_set].experiments is not None
449+
and data_sets[data_set].drugs is not None
450+
):
451+
dfs_to_merge[data_set] = deepcopy(data_sets[data_set].drugs)
452+
453+
concat_drugs = pd.concat(dfs_to_merge.values())
454+
out_df = deepcopy(concat_drugs)
455+
out_df['SMILES'] = concat_drugs['canSMILES']
456+
out_df['DrugID'] = concat_drugs['improve_drug_id']
457+
out_df['CAS_ID'] = None
458+
out_df.drop(['formula', 'weight', 'InChIKey'], axis=1, inplace=True)
459+
out_df = out_df[['DrugID', 'SMILES', 'canSMILES', 'chem_name', 'pubchem_id', 'CAS_ID', 'improve_drug_id']]
460+
out_df = out_df.rename(columns={'chem_name': 'NAME', 'pubchem_id': 'PUBCHEM_ID', 'improve_drug_id':'improve_chem_id'})
461+
out_df['PUBCHEM_ID'] = out_df['PUBCHEM_ID'].fillna(0)
462+
out_df['PUBCHEM_ID'] = pd.to_numeric(out_df['PUBCHEM_ID'], errors='coerce', downcast='integer')
463+
out_df['PUBCHEM_ID'] = out_df['PUBCHEM_ID'].replace(0, None)
464+
465+
outfile_path = args.WORKDIR.joinpath(
466+
"data_out",
467+
"x_data",
468+
"drug_info.tsv"
469+
)
470+
out_df.to_csv(
471+
path_or_buf=outfile_path,
472+
sep='\t',
473+
index=False,
474+
)
475+
476+
442477
#-------------------------------------------------------------------
443478
# create mordred table
444479
#-------------------------------------------------------------------

0 commit comments

Comments
 (0)