@@ -282,10 +282,18 @@ def process_datasets(args):
282282
283283
284284 #-------------------------------------------------------------------
285- # create copynumber master table
285+ # create copynumber master table & discretized table
286286 #-------------------------------------------------------------------
287287
288288 merged_copy_number = merge_master_tables (args , data_sets = data_sets , data_type = 'copy_number' )
289+ merged_copy_number .fillna (1 , inplace = True )
290+
291+ discretized_copy_number = merged_copy_number .apply (
292+ pd .cut ,
293+ bins = [0 , 0.5210507 , 0.7311832 , 1.214125 , 1.422233 , 2 ],
294+ labels = [- 2 , - 1 , 0 , 1 , 2 ],
295+ include_lowest = True
296+ )
289297
290298 merged_copy_number = pd .merge (
291299 merged_copy_number ,
@@ -316,7 +324,43 @@ def process_datasets(args):
316324 "cancer_copy_number.tsv"
317325 )
318326 (merged_copy_number
319- .fillna (1 )
327+ .transpose ()
328+ .to_csv (
329+ path_or_buf = outfile_path ,
330+ sep = '\t ' ,
331+ header = False
332+ )
333+ )
334+
335+ discretized_copy_number = pd .merge (
336+ discretized_copy_number ,
337+ data_gene_names [[
338+ 'entrez_id' ,
339+ 'ensemble_gene_id' ,
340+ 'gene_symbol'
341+ ]],
342+ how = 'left' ,
343+ on = 'entrez_id' ,
344+ )
345+
346+ discretized_copy_number .insert (
347+ 1 ,
348+ 'ensemble_gene_id' ,
349+ discretized_copy_number .pop ('ensemble_gene_id' )
350+ )
351+ discretized_copy_number .insert (
352+ 1 ,
353+ 'gene_symbol' ,
354+ discretized_copy_number .pop ('gene_symbol' )
355+ )
356+
357+ # writing the expression datatable to '/x_data/*_copy_number.tsv'
358+ outfile_path = args .WORKDIR .joinpath (
359+ "data_out" ,
360+ "x_data" ,
361+ "cancer_discretized_copy_number.tsv.tsv"
362+ )
363+ (discretized_copy_number
320364 .transpose ()
321365 .to_csv (
322366 path_or_buf = outfile_path ,
0 commit comments