Skip to content

Commit 1436bc6

Browse files
committed
added processing for drug_SMILES.tsv
1 parent c6b598a commit 1436bc6

File tree

1 file changed

+30
-3
lines changed

1 file changed

+30
-3
lines changed

scripts/prepare_data_for_improve.py

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -357,7 +357,7 @@ def process_datasets(args):
357357
outfile_path = args.WORKDIR.joinpath(
358358
"data_out",
359359
"x_data",
360-
"cancer_discretized_copy_number.tsv.tsv"
360+
"cancer_discretized_copy_number.tsv"
361361
)
362362
(discretized_copy_number
363363
.transpose()
@@ -367,10 +367,37 @@ def process_datasets(args):
367367
header=False
368368
)
369369
)
370-
# join the "meta data tables" like copynumber etc.
370+
371+
#-------------------------------------------------------------------
372+
# create SMILES table
373+
#-------------------------------------------------------------------
374+
375+
dfs_to_merge = {}
376+
for data_set in data_sets:
377+
if (data_sets[data_set].experiments is not None
378+
and data_sets[data_set].drugs is not None
379+
):
380+
dfs_to_merge[data_set] = deepcopy(data_sets[data_set].drugs)
381+
382+
concat_drugs = pd.concat(dfs_to_merge.values())
383+
out_df = concat_drugs[['improve_drug_id','canSMILES']].drop_duplicates()
384+
out_df.rename(
385+
columns={'improve_drug_id': 'improve_chem_id'},
386+
inplace=True,
387+
)
388+
389+
outfile_path = args.WORKDIR.joinpath(
390+
"data_out",
391+
"x_data",
392+
"drug_SMILES.tsv"
393+
)
394+
out_df.to_csv(
395+
path_or_buf=outfile_path,
396+
sep='\t',
397+
index=False,
398+
)
371399

372400

373-
374401

375402
def split_data_sets(
376403
args: dict,

0 commit comments

Comments
 (0)