-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvisualisation.R
401 lines (360 loc) · 18.9 KB
/
visualisation.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
library(dplyr) # Version 0.7.0 or above
library(reshape2)
library(RColorBrewer)
library(igraph)
library(ggplot2)
library(tidyr)
#------Visualisation Initialisation------
edge_width = 3.5 #thickness of edges
arrow_size = 0.5 #size of arrow heads
arrow_width = 3 #width of arrow head
label_dist = 0 #distance of edge label along arrow
legend_cex = 2.7 #size of text in legends
legend_point_size = 4 #size of symbol in legend for phenotype vertices
legend_line_size = 6 #size of symbol in legend for edges
edge_label_size = 2.1 #size of text in edge labels
#------Functions------
blue.pal <- brewer.pal(n = 7, "Blues")
red.pal <- brewer.pal(n = 7, "YlOrRd")
edge.pal <- brewer.pal(8, "Dark2")[3:8] #remove first colour as green is too confusing
Edge.col <- function(edge.list, pal = edge.pal) {
# Add colours to edges based on node which is mutated on traversing that edge
#
# Args:
# edge.list: List of edges with columns from, to, and change with the last noting the node which mutated and the former being the vertices
# pal: palette from presets for RColorBrewer to use
#
# Returns: list of colours to apply to edges, to be used for edge.color in igraph::plot
edge.index <- as.factor(edge.list$change)
edge.pal <- pal
edge.col <- edge.pal[edge.index]
return(edge.col)
}
to.dev <- function(expr, dev, filename, ..., verbose=TRUE) {
# From https://github.com/dbarneche/2014-07-14-Dalhousie/
# Takes a plotting function and outputs the plot. Can do any output type: png, svg, pdf
# Args:
# expr: A function that plots something, which alone evaluates within R
# dev: Type of output e.g. pdf, no need for quotes
# filename: name under which to save file
# ... : takes arguments for dev e.g. width and height
# Returns:
# Status of output
if ( verbose )
cat(sprintf("Creating %s\n", filename))
dev(filename, ...)
on.exit(dev.off())
eval.parent(substitute(expr))
}
make.verts.table <- function(summary.line) {
# Make a unique vertex id in the correct format for `igraph::graph_from_data_frame`
#
# Args:
# List of dataframes with the node info for each cell line e.g. `expanded.summary.by.mutation.attractor`
# Returns:
# Same list of dataframes with FIRST column being the VertID
verts.table <- mutate(summary.line, VertID = paste(Mutation, Attractor, sep = "_AT_")) %>%
select(VertID, everything()) #put id column first, req for igraph::graph_from_data_frame
return(verts.table)
}
make.edge.list <- function(verts){
# Dependencies: library(dplyr) of version 0.7.0 or later
# Added mutation level as well as node to `Change`
#
# Take a list of nodes and make all the edges for a powerset
#
# Args: node.table: Data.frame with:
## unique node id: VertID, a column for every netw Node,
## Column for all BMA nodes mutated Node1, Node2, etc
#
# Returns:
# List of edges for the visualisation, as a edge list not an adjacency matrix
node.cols <- verts %>%
select(contains("Node")) %>%
colnames # makes a vector of all the Node columns
mut.cols <- verts %>%
select(contains("Mut")) %>%
select(-Mutation) %>%
select(-Number.Muts) %>%
colnames # makes a vector of all the Node columns
#Bind together Node and Mut for reporting as final overall change
for( i in 1:length(node.cols)){
verts <- unite(verts, !!paste0("NodeMut", i), node.cols[i], mut.cols[i], remove = FALSE, sep = "@") #!! is to enquote paste for unite in dplyr fashion
}
nodemut.cols <- verts %>%
select(contains("NodeMut")) %>%
colnames
verts <- verts %>% unite(NodeMuts, nodemut.cols, remove = FALSE, sep = "_") # Unite the Nodes into one string (but preserve dropping of the Attractor component from VertID)
set <- verts[["NodeMuts"]] %>% as.list
names(set) <- verts[["VertID"]] # Convert to names list for use of set functions of `dplyr`
set <- lapply(set, strsplit, split = "_") # Split nodes into a vector of nodes
set <- lapply(set, function(x) {x %>% unlist %>% setdiff("NA@NA")}) # Remove `NA`
set <- set[order(sapply(set, length), decreasing=FALSE)] # Put in order of number of mutations from least to most
edge.list <- data.frame(from = character(), to = character(), change = character()) # Initialise
# Look through pairs of mutatations, from top to bottom as arranged in ascending order of length
# If the number of mutations of the latter is 1 greater than the former, consider for edge. Otherwise we would be losing a mutation e.g. going from A to B necesitates losing A, which we assume does not occur (losing a gene counts as a new mutation)
# Then check for intersection, only allow edge is the intersection contains all old genes, so cannot go from A to BC, as this again entails losing mutation A.
for(i in 1:length(set)){
for (j in i:length(set)){
# Check if only one node more between vertices
if(length(set[[i]]) == length(set[[j]])-1){
sect <- intersect(set[[j]], set[[i]]) # Nodes shared between vertices
diff <- setdiff(set[[j]], set[[i]]) # Nodes which differ between vertices. Note, order matters for this function
# Check if only one node has CHANGED between vertices
if(length(sect) == length(set[[j]])-1){
edge = data.frame(from = names(set[i]), to = names(set[j]), change = diff) # Make a new row for output data frame
edge.list <- rbind(edge.list, edge) # Write to output frame
}
}
}
}
return(edge.list)
}
plot.rel <- function(line, pheno, inet, vertices, label = TRUE, v.size = 15, e.label.size = edge_label_size){
# Function to plot relative graphs
#
# Args:
# line = cell line to plot
# pheno = phenotype to plot, either apop or prolif
# inet = list of igraph net objects
# vertices = list of vertices for above objects
# label = Plot edge labels or not
# v.size = vertex size
# e.label.size = edge label size
#
# Returns a plot
data <- inet[[line]]
if(pheno == "apop"){
plot(data
, vertex.color = V(data)$color.apop.rel
#, layout = layout_as_tree(net$`SKBR3`, root = "EGF__1__Wnt1__1_AT_0")
, layout = layout.sugiyama(data, hgap = 2, vgap = 10)$layout
, vertex.label = NA
, vertex.size = v.size
, edge.label = if (label == TRUE) {E(data)$change} else {NA}
, edge.label.family = "sans"
, edge.width = edge_width
, edge.arrow.width = arrow_width
, edge.arrow.size = arrow_size
, edge.label.color = "black"
, edge.label.cex = e.label.size
, edge.label.dist = label_dist
, edge.color = E(data)$colour
#, main = paste0("Apoptosis in ", line)
)
lgnd = legend("topleft"
, title = "Mean\n Apoptosis"
, bty = "n"
, pch=21
, cex = legend_cex
, pt.cex = legend_point_size
, pt.bg = unique(as.vector(arrange(vertices[[line]], mean_Apoptosis))$color.apop.rel) #Colours to fill legend with
, legend=unique(as.vector(arrange(vertices[[line]], mean_Apoptosis))$mean_Apoptosis)
)
params = lgnd$rect
rect(xleft = params[['left']],
ybottom = params[['top']] - params[['h']],
xright = params[['left']] + params[['w']],
ytop = params[['top']] + 0.15)
legend(
"topright"
, title = "Mutation"
##, bty = "n"
, pch = "-"
, pt.cex = legend_line_size
, cex = legend_cex
, pt.bg = unique(E(data)$colour) #add colour to non-line symbol in legend
, col = unique(E(data)$colour) #add colour to line symbol in legend
, legend = gsub("[\n]", "", unique(E(data)$change))
)
}
if(pheno == "prolif"){
plot(data
, vertex.color = V(data)$color.prolif.rel
#, layout = layout_as_tree(net$`SKBR3`, root = "EGF__1__Wnt1__1_AT_0")
, layout = layout.sugiyama(data, hgap = 2, vgap = 10)$layout
, vertex.label = NA
, vertex.size = v.size
, edge.label = if (label == TRUE) {E(data)$change} else {NA}
, edge.label.family = "sans"
, edge.width = edge_width
, edge.arrow.width = arrow_width
, edge.arrow.size = arrow_size
, edge.label.color = "black"
, edge.label.cex = e.label.size
, edge.label.dist = label_dist
, edge.color = E(data)$colour
#, main = paste0("Proliferation in ", line)
)
lgnd = legend("topleft"
, title = "Mean\n Proliferation"
, bty = "n"
, pch=21
, cex = legend_cex
, pt.cex = legend_point_size
, pt.bg = unique(as.vector(arrange(vertices[[line]], mean_Proliferation))$color.prolif.rel) #Colorus to fill legend with
, legend=unique(as.vector(arrange(vertices[[line]], mean_Proliferation))$mean_Proliferation)
)
params = lgnd$rect
rect(xleft = params[['left']],
ybottom = params[['top']] - params[['h']],
xright = params[['left']] + params[['w']],
ytop = params[['top']] + 0.15)
legend(
"topright"
, title = "Mutation"
#, bty = "n"
, pch = "-"
, col = unique(E(data)$colour) #add colour to line symbol in legend
, pt.cex = legend_line_size
, cex = legend_cex
, pt.bg = unique(E(data)$colour)
, legend = gsub("[\n]", "", unique(E(data)$change))
)
}
}
plot.abs <- function(line, pheno, inet, vertices, label = TRUE, v.size = 15, e.label.size = edge_label_size){
# Function to plot absolute graphs
#
# Args:
# line = cell line to plot
# pheno = phenotype to plot, either apop or prolif
# inet = list of igraph net objects
# vertices = list of vertices for above objects
# label = Plot edge labels or not
# v.size = vertex size
# e.label.size = edge label size
#
# Returns a plot
data <- inet[[line]]
if(pheno == "apop"){
plot(data
, vertex.color = V(data)$color.apop.absolute
, layout = layout.sugiyama(data, hgap = 2, vgap = 10)$layout
, vertex.label = NA
, vertex.size = v.size
, edge.label = if (label == TRUE) {E(data)$change} else {NA}
, edge.label.family = "sans"
, edge.width = edge_width
, edge.arrow.width = arrow_width
, edge.arrow.size = arrow_size
, edge.label.color = "black"
, edge.label.cex = e.label.size
, edge.label.dist = label_dist
, edge.color = E(data)$colour
)
lgnd = legend("topleft"
, title = "Mean\n Apoptosis"
, bty = "n"
, pch=21
, cex = legend_cex
, pt.cex = legend_point_size
, pt.bg = unique(as.vector(arrange(vertices[[line]], mean_Apoptosis))$color.apop.absolute) #Colors to fill legend with
, legend=unique(as.vector(arrange(vertices[[line]], mean_Apoptosis))$mean_Apoptosis)
)
params = lgnd$rect
rect(xleft = params[['left']],
ybottom = params[['top']] - params[['h']],
xright = params[['left']] + params[['w']],
ytop = params[['top']] + 0.15)
legend(
"topright"
, title = "Mutation"
, pch = "-"
, col = unique(E(data)$colour) #add colour to line symbol in legend
, pt.cex = legend_line_size
, cex = legend_cex
, pt.bg = unique(E(data)$colour)
, legend = gsub("[\n]", "", unique(E(data)$change))
)
}
if(pheno == "prolif"){
plot(data
, vertex.color = V(data)$color.prolif.absolute
, layout = layout.sugiyama(data, hgap = 2, vgap = 10)$layout
, vertex.label = NA
, vertex.size = v.size
, edge.label = if (label == TRUE) {E(data)$change} else {NA}
, edge.label.family = "sans"
, edge.width = edge_width
, edge.arrow.width = arrow_width
, edge.arrow.size = arrow_size
, edge.label.color = "black"
, edge.label.cex = e.label.size
, edge.label.dist = label_dist
, edge.color = E(data)$colour
)
lgnd = legend("topleft"
, title = "Mean\n Proliferation"
, pch=21
, bty = "n"
, cex = legend_cex
, pt.cex = legend_point_size
, pt.bg = unique(as.vector(arrange(vertices[[line]], mean_Proliferation))$color.prolif.absolute) #Colors to fill legend with
, legend=unique(as.vector(arrange(vertices[[line]], mean_Proliferation))$mean_Proliferation)
)
params = lgnd$rect
rect(xleft = params[['left']],
ybottom = params[['top']] - params[['h']],
xright = params[['left']] + params[['w']],
ytop = params[['top']] + 0.15)
legend(
"topright"
, title = "Mutation"
, pch = "-"
, col = unique(E(data)$colour) #add colour to line symbol in legend
, pt.cex = legend_line_size
, cex = legend_cex
, pt.bg = unique(E(data)$colour)
, legend = gsub("[\n]", "", unique(E(data)$change))
)
}
}
scale.for.color <- function(df, col, round.to = 2){
# Scale values to integers so can be used to colour vertices or edges
#
# Args:
# df: dataframe to be acted on
# col: column to scale
# round.to: need to round decimals off, as cannot multiply up a recurring number. This is the number of decimal places to round to
# Return:
# data frame with one column rounded to `round.to` decimal places and then multiplied up to be an integer. Also add 1 as this is needed to use as index for a vector of colours
new.col <- paste0("scaled_", col)
df[[new.col]] <- (10**round.to)*round(df[[col]], digits = round.to)+1
return(df)
}
visualise <- function(expanded.summary.by.mutation.attractor, cell.line.names) {
verts <- lapply(expanded.summary.by.mutation.attractor, make.verts.table) #make vert table for all cell lines
edges <- lapply(verts, make.edge.list) # Make edge list for all cell lines
edges <- lapply(edges, function(x) {x$colour <- Edge.col(x, pal = edge.pal); return(x)})
# Replace "@" symbol
edges <- lapply(edges, function(x) {x$change <- gsub("@2", "\n Act.", x$change); return(x)})
edges <- lapply(edges, function(x) {x$change <- gsub("@0", "\n Inact.", x$change); return(x)})
verts <- lapply(verts, scale.for.color, col = "mean_Proliferation", round.to = 2) #Scale list of verts for colour for proliferation
verts <- lapply(verts, scale.for.color, col = "mean_Apoptosis", round.to = 2) #Scale list of verts for colour for apoptosis
verts.max.prolif <- lapply(verts, function(x) {max(x[["scaled_mean_Proliferation"]])}) #Find maximum value of scaled colours for each df of vertices
verts.max.apop <- lapply(verts, function(x) {max(x[["scaled_mean_Apoptosis"]])}) #Find maximum value of scaled colours for each df of vertices
pal.prolif.rel <- lapply(verts.max.prolif, function(x) {pal <- colorRampPalette(c("white", blue.pal))(n = x); return(pal)}) #Makes a colour palette where red is the highest value of apoptosis for THAT cell line
pal.prolif.absolute <- colorRampPalette(c("white", blue.pal))(n = max(unlist(verts.max.prolif))) #Makes a colour palette where red is the highest value of apoptosis for ANY cell line
# Make Apoptosis colour palettes
pal.apop.rel <- lapply(verts.max.apop, function(x) {pal <- colorRampPalette(c("white", red.pal))(n = x); return(pal)}) #Makes a colour palette where red is the highest value of apoptosis for THAT cell line
pal.apop.absolute <- colorRampPalette(c("white", red.pal))(n = max(unlist(verts.max.apop))) #Makes a colour palette where red is the highest value of apoptosis for ANY cell line
verts <- mapply(function(x,y) {x$color.prolif.rel <- y[x$scaled_mean_Proliferation]; return(x)}, x = verts, y = pal.prolif.rel, SIMPLIFY = FALSE) #Must apply BEFORE conversion to igraph object, as cannot loop over it easily due to needing to access nodes with function V(net)
verts <- lapply(verts, function(x) {x$color.prolif.absolute <- pal.prolif.absolute[x$scaled_mean_Proliferation]; return(x)}) #Must apply BEFORE conversion to igraph object, as cannot loop over it easily due to needing to access nodes with function V(net)
verts <- mapply(function(x,y) {x$color.apop.rel <- y[x$scaled_mean_Apoptosis]; return(x)}, x = verts, y = pal.apop.rel, SIMPLIFY = FALSE) #Must apply BEFORE conversion to igraph object, as cannot loop over it easily due to needing to access nodes with function V(net)
verts <- lapply(verts, function(x) {x$color.apop.absolute <- pal.apop.absolute[x$scaled_mean_Apoptosis]; return(x)}) #Must apply BEFORE conversion to igraph object, as cannot loop over it easily due to needing to access nodes with function V(net)
# Make a list of igraph network objects for each cell line
net <- mapply(graph_from_data_frame, d = edges, vertices = verts, list(directed = TRUE), SIMPLIFY = FALSE)
cell.line.names.proper <- cell.line.names [! cell.line.names %in% "Background"] # Remove background as not a network to plot, only one mutation
heightf = 7 # Factor to multiply resolution by for height
widthf = 11 # Factor to multiply resolution by for height
lapply(cell.line.names.proper, function(x) {to.dev(plot.rel(x, "apop", net, verts), png, file.path(paste0(x, "_apop_rel.png")), width = 300*widthf, height = 300*heightf, res = 300, pointsize = 8)})
lapply(cell.line.names.proper, function(x) {to.dev(plot.rel(x, "prolif", net, verts), png, file.path(paste0(x, "_prolif_rel.png")), width = 300*widthf, height = 300*heightf, res = 300, pointsize = 8)})
lapply(cell.line.names.proper, function(x) {to.dev(plot.abs(x, "apop", net, verts), png, file.path(paste0(x, "_apop_abs.png")), width = 300*widthf, height = 300*heightf, res = 300, pointsize = 8)})
lapply(cell.line.names.proper, function(x) {to.dev(plot.abs(x, "prolif", net, verts), png, file.path(paste0(x, "_prolif_abs.png")), width = 300*widthf, height = 300*heightf, res = 300, pointsize = 8)})
# Write out pdf plots
lapply(cell.line.names.proper, function(x) {to.dev(plot.rel(x, "apop", net, verts), pdf, file.path(paste0(x, "_apop_rel.pdf")), width = widthf, height = heightf, pointsize = 8)})
lapply(cell.line.names.proper, function(x) {to.dev(plot.rel(x, "prolif", net, verts), pdf, file.path(paste0(x, "_prolif_rel.pdf")), width = widthf, height = heightf, pointsize = 8)})
lapply(cell.line.names.proper, function(x) {to.dev(plot.abs(x, "apop", net, verts), pdf, file.path(paste0(x, "_apop_abs.pdf")), width = widthf, height = heightf, pointsize = 8)})
lapply(cell.line.names.proper, function(x) {to.dev(plot.abs(x, "prolif", net, verts), pdf, file.path(paste0(x, "_prolif_abs.pdf")), width = widthf, height = heightf, pointsize = 8)})
}