Skip to content

Commit 58fd4f9

Browse files
committed
Pulled updates from main to this branch
2 parents db46032 + 8e0f415 commit 58fd4f9

File tree

8 files changed

+28
-8
lines changed

8 files changed

+28
-8
lines changed

README.md

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,15 @@ please see the [schema description](schema/README.md).
2828

2929
We have created a build script that executes each step of the build process to enable the creation of a `local` folder with all the requisite folders.
3030

31-
The build requires Python as well as Docker to be installed.
31+
The build requires Python as well as Docker to be installed. To access
32+
the data on Synapse (MPNST, BeatAML proteomics), you will need to
33+
[register for a synapse account](http://synapse.org/register) and then
34+
request access to the [CoderData Build
35+
Team](https://www.synapse.org/#!Team:3503472). Then you will need to
36+
create a [personal authentication
37+
token](https://www.synapse.org/#!PersonalAccessTokens:) with Download
38+
access and then set the `SYNAPSE_AUTH_TOKEN` environment variable to
39+
that token.
3240

3341
To build the docker images and run them, simply run (though this will take a while!):
3442
```

build/beatAML/GetBeatAML.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,9 +131,10 @@ def generate_samples_file(prev_samples_path):
131131
prot_samples.rename(columns={"specimenType": "common_name"}, inplace=True)
132132
prot_samples["cancer_type"] = "Acute Myeloid Leukaemia"
133133
prot_samples["model_type"] = "ex vivo"
134-
prot_samples["other_id_source"] = "beatAML"
134+
prot_samples["other_id_source"] = "beatAML"
135135

136136
all_samples = pd.concat([prot_samples, full_samples])
137+
all_samples['species'] = 'Homo sapiens'
137138
maxval = max(pd.read_csv(prev_samples_path).improve_sample_id)
138139
mapping = {labId: i for i, labId in enumerate(all_samples['other_id'].unique(), start=(int(maxval)+1))}
139140
all_samples['improve_sample_id'] = all_samples['other_id'].map(mapping)

build/broad_sanger/02-broadSangerOmics.R

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,15 @@ sanger_files<-function(fi,value){
105105
rm(exp_file)
106106

107107
print('copy call')
108+
109+
##rename SANGER value
110+
# Amplification -> amp
111+
# Deletion -> deep del
112+
# Loss -> het loss
113+
# Gain -> gain
114+
# Neutral -> diploid
115+
#
116+
res$Sanger=sapply(res$Sanger,function(x) ifelse(x=='Amplification','amp',ifelse(x=='Deletion','deep del',ifelse(x=='Loss','het loss',ifelse(x=='Gain','gain','diploid')))))
108117
##calibrate the copy call
109118
res<-res|> ##deep del < 0.5210507 < het loss < 0.7311832 < diploid < 1.214125 < gain < 1.422233 < amp
110119
dplyr::mutate(IMPROVE=ifelse(copy_number<0.5210507,'deep del',

build/hcmi/01-createHCMISamplesFile.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ def align_to_linkml_schema(input_df):
4343
}
4444

4545
# Apply mapping
46+
input_df['species'] = 'Homo sapiens' ##i assume they're lal human?
4647
input_df['model_type'] = input_df['model_type'].map(mapping_dict)
4748
input_df.dropna(subset=['model_type'], inplace=True)
4849

build/mpnst/00_sample_gen.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,13 @@ manifest<-synapser::synTableQuery("select * from syn53503360")$asDataFrame()
3131
##first create samples for the original tumors
3232
tumorTable<-manifest|>
3333
dplyr::select(common_name='Sample')|>
34-
dplyr::mutate(other_id_source='NF Data Portal',cancer_type="Malignant peripheral nerve sheath tumor",species='Human',model_type='tumor')|>
34+
dplyr::mutate(other_id_source='NF Data Portal',other_names='',cancer_type="Malignant peripheral nerve sheath tumor",species='Human',model_type='tumor')|>
3535
tidyr::unite(col='other_id',c('common_name','model_type'),sep=' ',remove=FALSE)
3636

3737
##then create samples for the PDX
3838
sampTable<-manifest|>
3939
dplyr::select(c(common_name='Sample',MicroTissueDrugFolder))|>
40-
dplyr::mutate(other_id_source='NF Data Portal',cancer_type="Malignant peripheral nerve sheath tumor",species='Human',model_type='patient derived xenograft')|>
40+
dplyr::mutate(other_id_source='NF Data Portal',other_names='',cancer_type="Malignant peripheral nerve sheath tumor",species='Human',model_type='patient derived xenograft')|>
4141
tidyr::unite(col='other_id',c('common_name','model_type'),sep=' ',remove=FALSE)
4242

4343

build/mpnst/01_mpnst_get_omics.R

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,8 @@ cnv<-do.call(rbind,lapply(setdiff(combined$CopyNumber,NA),function(x){
157157
subset(!is.na(entrez_id))|>
158158
dplyr::select(entrez_id,log2)|>
159159
dplyr::distinct()|>
160-
dplyr::mutate(copy_number=2^log2)
160+
dplyr::mutate(copy_number=2^log2)|>
161+
dplyr::select(-log2)
161162

162163
res<-long_df|> ##deep del < 0.5210507 < het loss < 0.7311832 < diploid < 1.214125 < gain < 1.422233 < amp
163164
dplyr::mutate(copy_call=ifelse(copy_number<0.5210507,'deep del',

build/mpnst/02_get_drug_data.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ print(paste(alldrugs,collapse=','))
7272

7373

7474
##copy old drug to new drug
75-
olddrugs<-do.call(rbind,lapply(unique(unlist(strsplit(olddrugfiles,split=','))),function(x) read.table(x,header=T,sep='\t',quote='',comment.char=''))
75+
olddrugs<-do.call(rbind,lapply(unique(unlist(strsplit(olddrugfiles,split=','))),function(x) read.table(x,header=T,sep='\t',quote='',comment.char='')))
7676
olddrugs<-unique(olddrugs)
7777

7878
print(paste('Read in ',nrow(olddrugs),'old drugs'))

build/utils/fit_curve.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -198,8 +198,8 @@ def main():
198198
fname = args.output or 'combined_single_response_agg'
199199
process_df_part(df_all, fname, beataml=args.beataml)#, start=args.start, count=args.count)
200200

201-
if args.beataml == False:
202-
format_coderd_schema(fname+'.0')
201+
# if args.beataml == False:
202+
format_coderd_schema(fname+'.0')
203203

204204
if __name__ == '__main__':
205205
main()

0 commit comments

Comments
 (0)