Skip to content

Commit a33d7a4

Browse files
author
RubyFore
committed
Finishing up data prep for bladder pdos
1 parent 58aed7f commit a33d7a4

11 files changed

+170
-12
lines changed

build/bladderpdo/00_createBladderPDOSampleFile.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,4 +47,4 @@ def get_bladder_pdo_samples(synLoginObject, maxval):
4747

4848
bladder_pdo_samples = get_bladder_pdo_samples(synObject, prev_max_improve_id)
4949

50-
bladder_pdo_samples.to_csv("bladderpdo_samples.csv", index=False)
50+
bladder_pdo_samples.to_csv("/tmp/bladderpdo_samples.csv", index=False)

build/bladderpdo/01_createBladderPDOOmicsFiles.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -123,10 +123,10 @@ def get_bladder_pdo_copynumber(synObject, samples, genes):
123123
samples = pd.read_csv(args.samples)
124124

125125
if args.expression:
126-
get_bladder_pdo_transcriptomics(args.geolink, samples, genes).to_csv("bladderpdo_transcriptomics.csv", index=False)
126+
get_bladder_pdo_transcriptomics(args.geolink, samples, genes).to_csv("/tmp/bladderpdo_transcriptomics.csv", index=False)
127127

128128
if args.mutation:
129-
get_bladder_pdo_mutations(synObject, samples, genes).to_csv('bladderpdo_mutations.csv', index=False)
129+
get_bladder_pdo_mutations(synObject, samples, genes).to_csv('/tmp/bladderpdo_mutations.csv', index=False)
130130

131131
if args.copy:
132-
get_bladder_pdo_copynumber(synObject, samples, genes).to_csv("bladderpdo_copynumber.csv", index=False)
132+
get_bladder_pdo_copynumber(synObject, samples, genes).to_csv("/tmp/bladderpdo_copynumber.csv", index=False)
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
import synapseclient
2+
import pandas as pd
3+
4+
5+
def get_bladder_pdo_experiments(synObject, samples, drugs):
6+
# get list of syn id info
7+
files = list(syn.getChildren(parent='syn64765430', includeTypes=['file']))
8+
# load sample sheet and format _ to .
9+
# load conversion table and remove trailing _T
10+
conversion_table = syn.get(files[50]['id'])
11+
conversion_table_df = pd.read_excel(conversion_table.path, header=None)
12+
conversion_table_df[2] = conversion_table_df[1].str.rsplit("_", expand=True)[0]#.replace(".", "_")
13+
conversion_table_df[3] = conversion_table_df[2].str.replace(".", "_")
14+
#print(conversion_table_df.head)
15+
16+
# initiate empty pd.dat.frame
17+
drug_df = pd.DataFrame()
18+
# for each drug,
19+
for i in range(len(files)-4):
20+
drug_table_syn =syn.get(files[i]['id'])
21+
drug_table = pd.read_csv(drug_table_syn.path, sep="\t")
22+
# melt
23+
# link to conversion table
24+
# link to sample sheet
25+
# Rename, add columns
26+
melted_single_drug = drug_table.melt(id_vars = 'Unnamed: 0', value_vars = drug_table.columns[1:], var_name="sample")
27+
melted_single_drug['linkID'] = melted_single_drug['sample'].str.split(".", expand=True)[0]
28+
drugdata = melted_single_drug.merge(conversion_table_df, left_on = 'linkID', right_on = 0, how='left')[['Unnamed: 0', 'value', 3]]
29+
#print(drugdata.head)
30+
drugdata_with_improvesample = drugdata.merge(samples, left_on = 3, right_on='common_name')
31+
32+
# print(drugdata_with_improvesample.head)
33+
drugdata_with_improvesample = drugdata_with_improvesample[['Unnamed: 0', 'value', 'improve_sample_id']]
34+
#print(drugdata_with_improvesample.columns)
35+
drugdata_with_improvesample = drugdata_with_improvesample.rename({"Unnamed: 0" : "DOSE", 'value' : 'GROWTH'}, axis=1)
36+
#print(drugdata_with_improvesample.columns)
37+
38+
selected_drugdata = drugdata_with_improvesample
39+
selected_drugdata['chem_name'] = files[i]['name'].split(")")[1].split("(")[0].split(".")[0].strip().lower()
40+
#print(selected_drugdata.head)
41+
drugdata_with_both_improveIds = selected_drugdata.merge(drugs[['improve_drug_id', 'chem_name']], how='left')
42+
final_drugdata = drugdata_with_both_improveIds[['DOSE', 'GROWTH', 'improve_sample_id', 'improve_drug_id']]
43+
final_drugdata = final_drugdata.rename({'improve_drug_id' : "Drug"}, axis=1)
44+
final_drugdata['study'] = "Lee etal 2018 Bladder PDOs"
45+
final_drugdata['source'] = "Synapse"
46+
final_drugdata['time'] = 6
47+
final_drugdata['time_unit'] = 'days'
48+
#print(final_drugdata.head)
49+
# append to dataframe
50+
dose_resp_df = pd.concat([drug_df, final_drugdata])
51+
52+
return dose_resp_df
53+
54+
55+
if __name__ == "__main__":
56+
parser = argparse.ArgumentParser()
57+
parser.add_argument('-t' '--token', help='Synapse authentication token')
58+
parser.add_argument('-s', '--curSampleFile', help='Sample mapping file for bladder pdo samples')
59+
parser.add_argument('-d', '--drugfile', help='Drug mapping file for bladder pdo samples')
60+
parser.add_argument('-o', '--output', default = '/tmp/bladderpdo_doserep.tsv',help='Output file to be read into curve fitting code')
61+
62+
args = parser.parse_args()
63+
print("Logging into Synapse")
64+
PAT = args.token
65+
synObject = synapseclient.login(authToken=PAT)
66+
drug_df = pd.read_csv(args.drugfile, sep='\t')
67+
samples_df = pd.read_csv(args.curSampleFile)
68+
69+
doseresponse_data = get_bladder_pdo_experiments(synObject, samples_df, drug_df)
70+
doseresponse_data.to_csv(args.output, sep='\t')
71+

build/bladderpdo/build_drugs.sh

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,10 @@ trap 'echo "Error on or near line $LINENO while executing: $BASH_COMMAND"; exit
55

66
echo "Running script with token and drugFile $1"
77
# for running locally (from build directory):
8-
python3 -m bladderpdo.02_createBladderPDODrugsFile --token $SYNAPSE_AUTH_TOKEN -d $1 -o ./bladderpdo/bladderpdo_drugs.tsv
9-
#python3 02_createBladderPDODrugsFile.py --token $SYNAPSE_AUTH_TOKEN -d $1 -o /tmp/bladderpdo_drugs.tsv
8+
#python3 -m bladderpdo.02_createBladderPDODrugsFile --token $SYNAPSE_AUTH_TOKEN -d $1 -o ./bladderpdo/bladderpdo_drugs.tsv
9+
python3 02_createBladderPDODrugsFile.py --token $SYNAPSE_AUTH_TOKEN -d $1 -o /tmp/bladderpdo_drugs.tsv
1010

1111
echo "Running build_drug_desc.py..."
1212
#for running locally:
13-
python3 utils/build_drug_desc.py --drugtable ./bladderpdo/bladderpdo_drugs.tsv --desctable ./bladderpdo/bladderpdo_drug_descriptors.tsv.gz
14-
#python3 build_drug_desc.py --drugtable /tmp/bladderpdo_drugs.tsv --desctable /tmp/bladderpdo_drug_descriptors.tsv.gz
13+
#python3 utils/build_drug_desc.py --drugtable ./bladderpdo/bladderpdo_drugs.tsv --desctable ./bladderpdo/bladderpdo_drug_descriptors.tsv.gz
14+
python3 build_drug_desc.py --drugtable /tmp/bladderpdo_drugs.tsv --desctable /tmp/bladderpdo_drug_descriptors.tsv.gz

build/bladderpdo/build_exp.sh

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#!/bin/bash
2+
set -euo pipefail
3+
4+
trap 'echo "Error on or near line $LINENO while executing: $BASH_COMMAND"; exit 1' ERR
5+
6+
echo "Running 04-drug_dosage_and_curves.py with drugfile $2 and curSampleFile $1"
7+
python 03_createBladderPDOExperimentFile.py --token $SYNAPSE_AUTH_TOKEN --drugfile $2 --curSampleFile $1 --output /tmp/bladderpdo_doserep.tsv
8+
9+
python fit_curve.py --input /tmp/bladderpdo_doserep.tsv --output /tmp/bladderpdo_doserep.tsv

build/bladderpdo/build_omics.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@ trap 'echo "Error on or near line $LINENO while executing: $BASH_COMMAND"; exit
55

66
echo "Running script with token, curSamples $2, and genes $1."
77
# for mutation data (-m)
8-
#python3 01_createBladderPDOOmicsFiles.py --token $SYNAPSE_AUTH_TOKEN -s $2 -g $1 -m
8+
python3 01_createBladderPDOOmicsFiles.py --token $SYNAPSE_AUTH_TOKEN -s $2 -g $1 -m
99
# for expressiondata (-e)
10-
#python3 01_createBladderPDOOmicsFiles.py --token $SYNAPSE_AUTH_TOKEN -s $2 -g $1 -e
10+
python3 01_createBladderPDOOmicsFiles.py --token $SYNAPSE_AUTH_TOKEN -s $2 -g $1 -e
1111
# for copynumber
1212
python3 01_createBladderPDOOmicsFiles.py --token $SYNAPSE_AUTH_TOKEN -s $2 -g $1 -c

build/bladderpdo/requirements.R

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
install.packages("BiocManager")
2+
BiocManager::install("GEOquery",update=TRUE,ask=FALSE)
3+
BiocManager::install("GenomicRanges",update=TRUE,ask=FALSE)
4+
BiocManager::install("Homo.sapiens",update=TRUE,ask=FALSE)

build/bladderpdo/requirements.txt

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
pandas
2+
synapseclient
3+
argparse
4+
numpy==1.26.4
5+
matplotlib
6+
scikit-learn
7+
mordredcommunity
8+
rdkit
9+
wget
10+
gzip
11+
subprocess
12+
math
13+
tqdm
14+
itertools
15+
scipy
16+
multiprocessing

build/build_dataset.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,8 @@ def process_docker(dataset,validate):
4545
'pancpdo': ['pancpdo'],
4646
'cptac': ['cptac'],
4747
'genes': ['genes'],
48-
'upload': ['upload']
48+
'upload': ['upload'],
49+
'bladderpdo': ['bladderpdo']
4950
}
5051

5152
# Collect container names to build based on the dataset provided. Always build 'genes'.
@@ -125,7 +126,8 @@ def process_omics(executor, dataset, should_continue):
125126
'cptac': ['copy_number', 'mutations', 'proteomics', 'transcriptomics'],
126127
'hcmi': ['mutations', 'transcriptomics'],
127128
'pancpdo': ['transcriptomics'],
128-
'mpnstpdx':['copy_number', 'mutations', 'proteomics', 'transcriptomics']
129+
'mpnstpdx':['copy_number', 'mutations', 'proteomics', 'transcriptomics'],
130+
'bladderpdo': ['copy_number', 'mutations', 'transcriptomics']
129131
}
130132

131133
expected_omics = dataset_omics_files.get(dataset, [])

build/docker/Dockerfile.bladderpdo

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
FROM r-base:4.4.1
2+
ENV DEBIAN_FRONTEND=noninteractive
3+
RUN apt-get update --fix-missing
4+
#RUN apt-get install -y --fix-missing --allow-unauthenticated build-essential libpq-dev python3.10 python3-pip python3-setuptools python3-dev python3-venv libcurl4-openssl-dev libxml2-dev libglpk-dev
5+
6+
# RUN apt-get install -y --fix-missing --allow-unauthenticated build-essential python3-pip python3-setuptools python3-dev python3-venv libcurl4-openssl-dev libglpk-dev libxml2-dev libpq-dev
7+
8+
RUN apt-get install -y --fix-missing --allow-unauthenticated \
9+
build-essential \
10+
python3-pip \
11+
python3-setuptools \
12+
python3-dev \
13+
python3-venv \
14+
libcurl4-openssl-dev \
15+
libglpk-dev \
16+
libxml2-dev \
17+
libpq-dev \
18+
ca-certificates
19+
20+
RUN python3 -m venv /opt/venv
21+
RUN /opt/venv/bin/pip3 install --upgrade pip
22+
23+
24+
# Set MPLCONFIGDIR to a writable directory
25+
ENV MPLCONFIGDIR=/app/tmp/matplotlib
26+
RUN mkdir -p /app/tmp/matplotlib
27+
28+
29+
ENV PYTHONPATH "${PYTHONPATH}:/app"
30+
WORKDIR /app
31+
32+
ADD build/broad_sanger/obtainGSMidLink.R.R ./
33+
ADD build/broad_sanger/CNV-segfile-annotation.R ./
34+
ADD build/broad_sanger/*py ./
35+
ADD build/broad_sanger/*sh ./
36+
37+
ADD build/utils/* ./
38+
39+
ADD build/bladderpdo/requirements.txt .
40+
ADD build/bladderpdo/exp_requirements.r .
41+
42+
# installing r libraries
43+
RUN Rscript exp_requirements.r
44+
45+
# installing python libraries
46+
RUN /opt/venv/bin/pip3 install -r requirements.txt

build/docker/docker-compose.yml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ services:
6161
HTTPS_PROXY: ${HTTPS_PROXY}
6262
platform: linux/amd64
6363
image: mpnstpdx:latest
64+
6465
cptac:
6566
build:
6667
context: ../../
@@ -70,6 +71,15 @@ services:
7071
platform: linux/amd64
7172
image: cptac:latest
7273

74+
bladderpdo:
75+
build:
76+
context: ../../
77+
dockerfile: build/docker/Dockerfile.bladderpdo
78+
args:
79+
HTTPS_PROXY: ${HTTPS_PROXY}
80+
platform: linux/amd64
81+
image: bladderpdo:latest
82+
7383
genes:
7484
build:
7585
context: ../../

0 commit comments

Comments
 (0)