Skip to content

Commit 7e9d4ac

Browse files
committed
added drug exclusion to drug_info and morgan fingerprint
1 parent 0bfce9a commit 7e9d4ac

File tree

1 file changed

+11
-0
lines changed

1 file changed

+11
-0
lines changed

scripts/prepare_data_for_improve.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -466,6 +466,12 @@ def process_datasets(args):
466466
out_df['PUBCHEM_ID'] = pd.to_numeric(out_df['PUBCHEM_ID'], errors='coerce', downcast='integer')
467467
out_df['PUBCHEM_ID'] = out_df['PUBCHEM_ID'].replace(0, None)
468468

469+
if args.EXCL_DRUGS_LIST is not None:
470+
logger.info(
471+
f"Removing all chemical compunds with ids: '{args.EXCL_DRUGS_LIST}'"
472+
)
473+
out_df = out_df[~out_df['improve_drug_id'].isin(args.EXCL_DRUGS_LIST)]
474+
469475
outfile_path = args.WORKDIR.joinpath(
470476
"data_out",
471477
"x_data",
@@ -537,6 +543,11 @@ def process_datasets(args):
537543
concat_drugs = pd.concat(dfs_to_merge.values())
538544
out_df = concat_drugs.reset_index()
539545
out_df = out_df.drop_duplicates(subset=['improve_drug_id'], keep='first')
546+
if args.EXCL_DRUGS_LIST is not None:
547+
logger.info(
548+
f"Removing all chemical compunds with ids: '{args.EXCL_DRUGS_LIST}'"
549+
)
550+
out_df = out_df[~out_df['improve_drug_id'].isin(args.EXCL_DRUGS_LIST)]
540551
out_df = pd.concat((out_df, out_df['morgan fingerprint'].astype(str).apply(lambda x: pd.Series(list(x))).astype(int).add_prefix('ecfp4.')), axis=1)
541552
out_df = out_df.drop(['morgan fingerprint'], axis=1)
542553
out_df.rename(

0 commit comments

Comments
 (0)