Skip to content

Commit 0c724de

Browse files
committed
added discretized copynumber x_data
1 parent d63a378 commit 0c724de

File tree

1 file changed

+46
-2
lines changed

1 file changed

+46
-2
lines changed

scripts/prepare_data_for_improve.py

Lines changed: 46 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -282,10 +282,18 @@ def process_datasets(args):
282282

283283

284284
#-------------------------------------------------------------------
285-
# create copynumber master table
285+
# create copynumber master table & discretized table
286286
#-------------------------------------------------------------------
287287

288288
merged_copy_number = merge_master_tables(args, data_sets=data_sets, data_type='copy_number')
289+
merged_copy_number.fillna(1, inplace=True)
290+
291+
discretized_copy_number = merged_copy_number.apply(
292+
pd.cut,
293+
bins = [0, 0.5210507, 0.7311832, 1.214125, 1.422233, 2],
294+
labels = [-2, -1, 0, 1, 2],
295+
include_lowest=True
296+
)
289297

290298
merged_copy_number = pd.merge(
291299
merged_copy_number,
@@ -316,7 +324,43 @@ def process_datasets(args):
316324
"cancer_copy_number.tsv"
317325
)
318326
(merged_copy_number
319-
.fillna(1)
327+
.transpose()
328+
.to_csv(
329+
path_or_buf=outfile_path,
330+
sep='\t',
331+
header=False
332+
)
333+
)
334+
335+
discretized_copy_number = pd.merge(
336+
discretized_copy_number,
337+
data_gene_names[[
338+
'entrez_id',
339+
'ensemble_gene_id',
340+
'gene_symbol'
341+
]],
342+
how='left',
343+
on='entrez_id',
344+
)
345+
346+
discretized_copy_number.insert(
347+
1,
348+
'ensemble_gene_id',
349+
discretized_copy_number.pop('ensemble_gene_id')
350+
)
351+
discretized_copy_number.insert(
352+
1,
353+
'gene_symbol',
354+
discretized_copy_number.pop('gene_symbol')
355+
)
356+
357+
# writing the expression datatable to '/x_data/*_copy_number.tsv'
358+
outfile_path = args.WORKDIR.joinpath(
359+
"data_out",
360+
"x_data",
361+
"cancer_discretized_copy_number.tsv.tsv"
362+
)
363+
(discretized_copy_number
320364
.transpose()
321365
.to_csv(
322366
path_or_buf=outfile_path,

0 commit comments

Comments
 (0)