Skip to content

Commit 8e0f415

Browse files
authored
Merge pull request #183 from PNNL-CompBio/sample-schema-fix
Sample schema fix
2 parents 4ccfce0 + 2a0e871 commit 8e0f415

File tree

5 files changed

+9
-6
lines changed

5 files changed

+9
-6
lines changed

build/beatAML/GetBeatAML.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,9 +131,10 @@ def generate_samples_file(prev_samples_path):
131131
prot_samples.rename(columns={"specimenType": "common_name"}, inplace=True)
132132
prot_samples["cancer_type"] = "Acute Myeloid Leukaemia"
133133
prot_samples["model_type"] = "ex vivo"
134-
prot_samples["other_id_source"] = "beatAML"
134+
prot_samples["other_id_source"] = "beatAML"
135135

136136
all_samples = pd.concat([prot_samples, full_samples])
137+
all_samples['species'] = 'Homo sapiens'
137138
maxval = max(pd.read_csv(prev_samples_path).improve_sample_id)
138139
mapping = {labId: i for i, labId in enumerate(all_samples['other_id'].unique(), start=(int(maxval)+1))}
139140
all_samples['improve_sample_id'] = all_samples['other_id'].map(mapping)

build/hcmi/01-createHCMISamplesFile.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ def align_to_linkml_schema(input_df):
4343
}
4444

4545
# Apply mapping
46+
input_df['species'] = 'Homo sapiens' ##i assume they're lal human?
4647
input_df['model_type'] = input_df['model_type'].map(mapping_dict)
4748
input_df.dropna(subset=['model_type'], inplace=True)
4849

build/mpnst/00_sample_gen.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,13 @@ manifest<-synapser::synTableQuery("select * from syn53503360")$asDataFrame()
3131
##first create samples for the original tumors
3232
tumorTable<-manifest|>
3333
dplyr::select(common_name='Sample')|>
34-
dplyr::mutate(other_id_source='NF Data Portal',cancer_type="Malignant peripheral nerve sheath tumor",species='Human',model_type='tumor')|>
34+
dplyr::mutate(other_id_source='NF Data Portal',other_names='',cancer_type="Malignant peripheral nerve sheath tumor",species='Human',model_type='tumor')|>
3535
tidyr::unite(col='other_id',c('common_name','model_type'),sep=' ',remove=FALSE)
3636

3737
##then create samples for the PDX
3838
sampTable<-manifest|>
3939
dplyr::select(c(common_name='Sample',MicroTissueDrugFolder))|>
40-
dplyr::mutate(other_id_source='NF Data Portal',cancer_type="Malignant peripheral nerve sheath tumor",species='Human',model_type='patient derived xenograft')|>
40+
dplyr::mutate(other_id_source='NF Data Portal',other_names='',cancer_type="Malignant peripheral nerve sheath tumor",species='Human',model_type='patient derived xenograft')|>
4141
tidyr::unite(col='other_id',c('common_name','model_type'),sep=' ',remove=FALSE)
4242

4343

build/mpnst/01_mpnst_get_omics.R

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,8 @@ cnv<-do.call(rbind,lapply(setdiff(combined$CopyNumber,NA),function(x){
157157
subset(!is.na(entrez_id))|>
158158
dplyr::select(entrez_id,log2)|>
159159
dplyr::distinct()|>
160-
dplyr::mutate(copy_number=2^log2)
160+
dplyr::mutate(copy_number=2^log2)|>
161+
dplyr::select(-log2)
161162

162163
res<-long_df|> ##deep del < 0.5210507 < het loss < 0.7311832 < diploid < 1.214125 < gain < 1.422233 < amp
163164
dplyr::mutate(copy_call=ifelse(copy_number<0.5210507,'deep del',

build/utils/fit_curve.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -198,8 +198,8 @@ def main():
198198
fname = args.output or 'combined_single_response_agg'
199199
process_df_part(df_all, fname, beataml=args.beataml)#, start=args.start, count=args.count)
200200

201-
if args.beataml == False:
202-
format_coderd_schema(fname+'.0')
201+
# if args.beataml == False:
202+
format_coderd_schema(fname+'.0')
203203

204204
if __name__ == '__main__':
205205
main()

0 commit comments

Comments
 (0)