Skip to content

Commit 8abb501

Browse files
committed
updated with more testing updates
how did other things break?
1 parent 4184b06 commit 8abb501

File tree

8 files changed

+33
-26
lines changed

8 files changed

+33
-26
lines changed

build/beatAML/requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,4 @@ requests
44
synapseclient
55
argparse
66
numpy
7+
openpyxl

build/broad_sanger/01-broadSangerSamples.R

Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
library(curl)
99
library(dplyr)
1010
library(readr)
11+
library(XML)
1112

1213
##the only thing that Priyanka has here is TRP identifiers, so collecting those
1314
# tab<-read.table('DepMap_Argonne_Mapping.csv',sep=',',header=T)%>%
@@ -31,34 +32,37 @@ print(paste("Downloaded",nrow(depmap_models),'dep map identifiers and',nrow(sang
3132

3233
##query for cellosaurus automagically to get loadest version
3334
url='https://ftp.expasy.org/databases/cellosaurus/cellosaurus.xml'
34-
if(!file.exists('cell.xml'))
35-
curl_download(url,'cell.xml',quiet=TRUE)#curl(url, "r", h)
36-
cello<-XML::xmlParse('cell.xml')
35+
if(!file.exists('/tmp/cell.xml'))
36+
curl_download(url,'/tmp/cell.xml',quiet=TRUE)#curl(url, "r", h)
37+
cello<-XML::xmlParse('/tmp/cell.xml')
3738
cdf<-XML::xmlToList(cello)
3839

39-
40+
print('Got all cellosaurus ids')
4041
### next we parse through cellosaurus to get as many samples as we deem relevant
4142
##ok, this command seems to have gotten file in appropriate state
4243
cell.lines<-lapply(cdf$`cell-line-list`, function(x) unlist(x))
4344

4445
##now we need toe xtract columns
4546
options(show.error.messages=TRUE)
4647
full.res<-do.call(rbind,lapply(cell.lines,function(x){
47-
##create a data frame for each cell lines
48-
x<-unlist(x)
49-
#should only be one acession
50-
acc<-x[grep('accession.text',names(x),fixed=T)]%>%unlist()
51-
52-
cn<-x[grep('name.text',names(x),fixed=T)]%>%unlist()
53-
#these will fail if no key found
54-
spec<-x[grep("species-list.cv-term.text",names(x),fixed=T)]%>%unlist()
55-
#dis<-x[grep("disease-list.cv-term.text",names(x),fixed=T)]%>%unlist()
56-
data.frame(accession=cn,
57-
RRID=rep(acc,length(cn)),
58-
species=rep(spec,length(cn)))
59-
# disease=rep(dis,length(cn)))
48+
##create a data frame for each cell lines
49+
x<-unlist(x)
50+
#should only be one acession
51+
acc<-x[grep('accession.text',names(x),fixed=T)]%>%unlist()
52+
53+
cn<-x[grep('name-list.name.text',names(x),fixed=T)]%>%unlist()
54+
#these will fail if no key found
55+
spec<-x[grep("species-list.xref.label",names(x),fixed=T)]%>%unlist()
56+
#print(acc)
57+
#print(cn)
58+
#print(spec)
59+
#dis<-x[grep("disease-list.cv-term.text",names(x),fixed=T)]%>%unlist()
60+
data.frame(accession=cn,
61+
RRID=rep(acc,length(cn)),
62+
species=rep(spec,length(cn)))
63+
# disease=rep(dis,length(cn)))
6064
}))%>%
61-
subset(species=='Homo sapiens (Human)')
65+
subset(species=='Homo sapiens (Human)')
6266

6367

6468
print(paste('Got',nrow(full.res),'human cellosaurus samples'))

build/broad_sanger/03a-nci60Drugs.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,10 +51,10 @@ def main():
5151
arr = set(pubchems['CID'])
5252

5353
print("Querying pubchem from CIDs")
54-
pr.update_dataframe_and_write_tsv(arr,batch_size=400,isname=False)
54+
pr.update_dataframe_and_write_tsv(arr,opts.output,'/tmp/ignore_chems.txt',batch_size=400,isname=False)
5555

5656
##then make sure to paste `nsc` in front of all nsc idds
57-
res = pl.read_csv('drugs.tsv',separator='\t')
57+
res = pl.read_csv(opts.output,separator='\t')
5858

5959

6060
nsc = list(pubchems.filter(pl.col('CID').is_in(list(res['pubchem_id'])))['NSC'])

build/broad_sanger/build_drugs.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
/opt/env/bin/python 03a-nci60Drugs.py
1+
/opt/venv/bin/python 03a-nci60Drugs.py
22
Rscript 03-createDrugFile.R CTRPv2,GDSC,gCSI,PRISM,CCLE,FIMM

build/broad_sanger/requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,4 @@ scikit-learn
77
scipy
88
requests
99
openpyxl
10+
polars

build/build_all.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ def main():
3636
parser.add_argument('--drugs',dest='drugs',default=False,action='store_true')
3737
parser.add_argument('--exp',dest='exp',default=False,action='store_true')
3838
parser.add_argument('--all',dest='all',default=False,action='store_true')
39-
parser.add_argument('--dataset',dest='datasets',default='broad_sanger,cptac,hcmi,beataml,mpnst',help='Datasets to process. Defaults to all available, but if there are synapse issues, please remove beataml and mpnst')
39+
parser.add_argument('--dataset',dest='datasets',default='broad_sanger,hcmi,beataml,mpnst,cptac',help='Datasets to process. Defaults to all available, but if there are synapse issues, please remove beataml and mpnst')
4040

4141
args = parser.parse_args()
4242

@@ -72,7 +72,8 @@ def main():
7272
## can be run independently but first before omics/experiemnts
7373
if args.samples or args.all:
7474
### build gene file
75-
run_cmd(['genes','sh','build_genes.sh'],'gene file')
75+
if not os.path.exists('/tmp/genes.csv'):
76+
run_cmd(['genes','sh','build_genes.sh'],'gene file')
7677

7778
###build sample files
7879
sf=''
@@ -101,7 +102,7 @@ def main():
101102

102103
if not os.path.exists('local/'+da+'_drugs.tsv'):
103104
run_cmd([di,'sh','build_drugs.sh',df],da+' drugs')
104-
df = '/tmp/'+di+'_drugs.tsv'
105+
df = '/tmp/'+da+'_drugs.tsv'
105106

106107
#### Any new omics files are created here.
107108
## depends on samples!

build/docker/Dockerfile.broad_sanger_exp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ WORKDIR /app
1313

1414
ADD build/broad_sanger/03-createDrugFile.R ./
1515
ADD build/broad_sanger/04a-drugResponseData.R ./
16-
ADD build/broad_sanger/04-drug_dosage_and_curves.py ./
16+
ADD build/broad_sanger/*py ./
1717
ADD build/broad_sanger/build_drugs.sh ./
1818
ADD build/broad_sanger/build_exp.sh ./
1919
ADD build/utils/* ./

build/utils/pubchem_retrieval.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ def timeout_handler(signum, frame):
150150
should_continue = False
151151

152152
# Call this function from other scripts.
153-
def update_dataframe_and_write_tsv(unique_names, output_filename="drugs.tsv",ignore_chems="ignore_chems.txt", batch_size=1,isname=False):
153+
def update_dataframe_and_write_tsv(unique_names, output_filename="drugs.tsv",ignore_chems="ignore_chems.txt", batch_size=1,isname=True):
154154
global should_continue, existing_synonyms, existing_pubchemids
155155
time_limit=5*60*60 # 5 hours
156156
signal.signal(signal.SIGALRM, timeout_handler)

0 commit comments

Comments
 (0)