ptm.Rmd

---
title: "The Growing Landscape of Protein Modifications"
author: "E. Keith Keenan & Matthew D. Hirschey"
date: "`r format(Sys.time(), '%B %d, %Y')`"
output:
  pdf_document:
    toc: yes
  html_notebook:
    df_print: paged
    toc: yes
---
This is an [R Markdown](http://rmarkdown.rstudio.com) notebook accompanying a review on protein modifications. When you execute code within the notebook, the results appear beneath the code and Figures will be save to the working directory. 

##Load libraries
```{r load_block, warning=FALSE, echo=TRUE}
library(tidyverse)
library(janitor)
library(viridis)
library(XML)
library(feather)
library(rmarkdown)
library(beepr) #long analysis; get some coffee, and comeback when ready
library(patchwork)

#clear environment
#rm(list=ls()) 

#print Session information for provenance and reproducibility
utils:::print.sessionInfo(sessionInfo()[-8]) 
#You can remove an item from sessionInfo(), which is a list with a class attribute, by printing the resulting object omitting one of the list items (omitted list of packages installed, but not loaded)

#Set theme
theme_set(theme_light())
```

##Figure 3a
Overall goal is to quanitfy known landscape of protein amino acids. Chose to get data from Uniprot, as a comprehensive and validated resouce containing data for human proteins. 
```{r fig1}
ptm_raw <- read_tsv("https://www.uniprot.org/docs/ptmlist.txt", col_names = FALSE, skip = 48) 
#skip 48 first lines which contain data file dictionary

#URL points to a datafile, to increase reproducibility; datafile is also downloaded 12/21/2018 and saved in working data directory in case URL link breaks
#alt
#ptm_raw <- read_tsv("data/ptmlist.txt", col_names = FALSE, skip = 48)

#make working df
ptm <- ptm_raw %>% 
  separate(X1, c("key", "value"), sep = 3) %>% 
  mutate(id = if_else(grepl("ID", key), value, NA_character_)) %>% #must call NA_char so that fill fxn works
  fill(id) #need fill fxn to populate ids across all observations, so that spread can work

#clean more
ptm$key <- str_trim(ptm$key, side = "right") #use stringr pkg to remove white space *janitor works on colnames only

#drop rows, duplicate rows are causing problems with spread, and don't need them
ptm <- ptm %>% 
  filter(!key %in% c("//", "TR", "DR", "---"))
  
#This is code I used to ensure that there were no duplicates
#ptm <- ptm %>%
#  unite(key_id, c("key", "id"), sep = "_", remove = FALSE)
#ptm_dup <- get_dupes(ptm, key_id)
#I check the ptm_dup df and made to sure to drop the keys that had more than one entry (immediate preceeding code chunk)

#spread data
ptm <- ptm %>% 
  spread(key, value) #not clever names, but appropriate

#gives a tibble of 645 observations, therefore 645 unique PTMs

#double check to see no duplicates
#get_dupes(ptm, id)

#more cleaning steps
ptm$MM <- as.numeric(ptm$MM)
ptm$MA <- as.numeric(ptm$MA)
ptm$KW <- str_replace(ptm$KW, "\\.", "") #need two \\ to mean literal "."
ptm$KW <- as.factor(ptm$KW)
ptm$FT <- str_trim(ptm$FT, side = "left") #use stringr pkg to remove white space
ptm$TG <- str_trim(ptm$TG, side = "left")
ptm$TG <- str_replace(ptm$TG, "\\.", "") #need two \\ to mean literal "."
ptm$KW <- fct_explicit_na(ptm$KW, na_level = "Other") #get rid of NAs in KW by making a factor
ptm <- ptm %>% select(-Cop, -Dis) #remove copyright and distribution columns
ptm$KW <- str_trim(ptm$KW, side = "both")

#a little bit of eda
count(ptm, FT, sort = TRUE)
#should I include crosslinks? Or just modifications?

ptm %>%
  filter(!FT == "CROSSLNK") %>% #omit AA cross links only
  summarize(n = n())
#This code snippet give me the total number of unique modifications, with CROSSLINK removed; total is 498. But I know the list doens't include 3 recently published modifications discovered by our group, plus a few others by YMZ (considering this a lag in DB updating), so OK to conclude over 500.

count(ptm, KW, sort = TRUE) #number of modifications by keyword
#a lot of glycoproteins!

#ptm %>%
#  filter(FT == "MOD_RES") %>% #include only modified AAs, no cross links, no lipids, no glycoproteins?
#  count(TG, sort = TRUE) %>% #target (TG) is exactly what I need
#  mutate(TG = fct_reorder(TG, n)) %>% 
#  ggplot(aes(TG, n)) +
#  geom_col() +
#  coord_flip() +
#  labs(x = "") + 
#  expand_limits(y = 40)
#commented this out because it only includes modified AAs; not sure if this is useful

ptm %>%
  filter(!FT == "CROSSLNK") %>% #omit AA cross links only
  count(TG, sort = TRUE)

kw_levels <-
  ptm %>%
  filter(!FT == "CROSSLNK") %>% #omit AA cross links only
  count(KW, sort = TRUE) %>% 
  slice(1:10) %>% 
  filter(!is.na(KW)) %>% 
  pull(KW)

#ptm$KW <- factor(ptm$KW, levels = kw_levels)

fig3a <- 
  ptm %>%
  filter(!FT == "CROSSLNK") %>% #omit AA cross links only
  ggplot() +
  geom_bar(aes(fct_rev(fct_infreq(TG, ordered = TRUE)), fill = fct_infreq(KW) %>% fct_lump_n(10)), color = "black") +
  coord_flip() +
  labs(x = "", y = "Count") +
  expand_limits(y = 80) +
  scale_fill_viridis(discrete = TRUE, 
                     direction = -1, 
                     option = "viridis", 
                     name = "Most Frequent \nModification Class") +
  NULL

#save plot
ggsave("output/fig3a.pdf", plot = last_plot(), dpi = 600)

#save ptm
#save(ptm, file=here::here("data", "ptm.RData"))

```

```{r}
#####################################################
#pull
ptm_vec <- ptm %>%
  filter(!is.na(id)) %>% 
  filter(!FT == "CROSSLNK") %>% 
  select(id, AC) %>% 
  filter(str_detect(id, "Blocked amino end", negate = TRUE)) %>% 
  filter(str_detect(id, "\\.\\.\\.", negate = TRUE)) %>% 
  #tidyr::separate(id, into = c("id", "tmp"), sep = "\\([:alpha:]{3}\\)$") %>% 
  distinct(id) %>% 
  pull(id)

ptm_vec <-  str_trim(ptm_vec, side = "both")

save(ptm_vec, file=here::here("data", "ptm_vec.RData"))

 ##################################################### 
```

```{r}
#Run pir.Rmd first to generate pir_sys
ptm_dr <- ptm_raw %>% 
  separate(X1, c("key", "value"), sep = 3) %>% 
  mutate(id = if_else(grepl("ID", key), value, NA_character_)) %>% #must call NA_char so that fill fxn works
  fill(id) 

#clean more
ptm_dr$key <- str_trim(ptm_dr$key, side = "right") #use stringr pkg to remove white space *janitor works on colnames only

ptm_dr <- ptm_dr %>% 
  filter(key == "ID" | str_detect(value, "RESID") | key == "AC") %>% 
  spread(key, value) %>% 
  select(ID, AC, DR) %>% 
  rename(DR_ID = ID)

ptm_dr$DR <- str_trim(ptm_dr$DR, side = "left")
ptm_dr$DR <- str_remove(ptm_dr$DR, "RESID\\;\\s")
ptm_dr$DR <- str_remove(ptm_dr$DR, "\\.")

ptm_dr$AC <- str_trim(ptm_dr$AC, side = "left")
ptm$AC <- str_trim(ptm$AC, side = "left")

ptm <- ptm %>% 
  left_join(ptm_dr, by = "AC")

ptm <- ptm %>% 
  left_join(pir_sys, by = c("DR" = "id"))

save(ptm, file=here::here("data", "ptm.RData"))

```

##Figure 3b
Goal is to determine how these modifications are distributed; thought it'd be interesting to visualize by average added mass (MA) to a protein, with several small changes in molecular mass, with some very large additions of mass
```{r fig2}
fig3b <- ptm %>%
  filter(!FT == "CROSSLNK") %>% #omit AA cross links only
  ggplot() +
  #geom_point(aes(x = MA, y =0, color = fct_lump(KW, 0)), shape = "|", size = 15, alpha = 1/2) +
  geom_density(aes(x = MA, ..scaled.., color = fct_lump(KW,0))) +
  geom_rug(aes(x = MA, y = 0, color = fct_lump(KW,0)), sides = "b", alpha = 1/2, position = "jitter", size = 1) + 
  labs(x = "Mass Appended (Da)", y = "Distribtution of Modifications (scaled)") +
  scale_color_viridis(discrete = TRUE, direction = 1) +
  scale_y_continuous(limits = c(0,1)) + 
  theme(legend.position = "") +
  NULL

#save plot
ggsave("output/fig3b.pdf", plot = last_plot(), width = 5, height = 5, dpi = 600)

#the reason some average masses (MA) are so abundant is because you find the same modifications across several AAs, thereby weighting some more
#NB several glycans and lipids are variable masses, and therefore are entered as NA, so not reflected in the graph

ptm %>%
  filter(!FT == "CROSSLNK") %>% #omit AA cross links only
  count(KW, sort = TRUE)

#combine figures for editor
fig3ab <- 
  fig3a + fig3b +
  plot_annotation(tag_levels = 'A')
ggsave("output/fig3ab.pdf", plot = fig3ab, width = 8, height = 4, dpi = 600)

```
##AA Analyses
###Lysine Analysis
In this code chunk, the goal is to count and summarize lysine modifications.
```{r lysine}
ptm %>% 
  filter(!FT == "CROSSLNK") %>% #omit AA cross links only
  filter(TG == "Lysine") %>% 
  count(ID, sort = TRUE)

#code chunk to make a tibble that is easy to view all attributes; no need to save as an object in environment
ptm %>% 
  filter(!FT == "CROSSLNK") %>% #omit AA cross links only
  filter(TG == "Lysine") %>% 
  arrange(MA) #sorts by mass, low to high
```

###Cysteine Analysis
In this code chunk, the goal is to count and summarize cysteine modifications. Counted 57 (as of Feb 2019), however does not include 3 published modifiations: succination, 2,3-dicarboxylpropylation (i.e. itaconylation), or s-acetylation, so OK to conclude 60, at least.
```{r cysteine}
ptm %>% 
  filter(!FT == "CROSSLNK") %>% #omit AA cross links only
  filter(TG == "Cysteine") %>% 
  count(ID, sort = TRUE)

#code chunk to make a tibble that is easy to view all attributes; no need to save as an object in environment
ptm %>% 
  filter(!FT == "CROSSLNK") %>% #omit AA cross links only
  filter(TG == "Cysteine") %>% 
  arrange(MA) #sorts by mass, low to high
```
###Serine Analysis
In this code chunk, the goal is to count and summarize serine modifications. Counted 70 (as of Feb 2019). 
```{r serine}
ptm %>% 
  filter(!FT == "CROSSLNK") %>% #omit AA cross links only
  filter(TG == "Serine") %>% 
  count(ID, sort = TRUE)

ptm %>% 
  filter(!FT == "CROSSLNK") %>% #omit AA cross links only
  filter(TG == "Threonine") %>% 
  count(ID, sort = TRUE)

ptm %>% 
  filter(!FT == "CROSSLNK") %>% #omit AA cross links only
  filter(TG == "Tyrosine") %>% 
  count(ID, sort = TRUE)

ptm %>% 
  filter(!FT == "CROSSLNK") %>% #omit AA cross links only
  filter(TG == "Serine") %>% 
  filter(str_detect(CF, "P")) %>% 
  count(ID, sort = TRUE)
#13 serine modifications contain phosphate (12 carbon-phosphate, 1 phosphate only)

#code chunk to make a tibble that is easy to view all attributes; no need to save as an object in environment
ptm %>% 
  filter(!FT == "CROSSLNK") %>% #omit AA cross links only
  filter(TG == "Serine") %>% 
  arrange(MA) #sorts by mass, low to high
```

###Phenylalanine Analysis
In this code chunk, the goal is to count and summarize phenylalanine modifications. Counted 5 (as of Feb 2019); 1?
```{r phenylalanine}
ptm %>% 
  filter(!FT == "CROSSLNK") %>% #omit AA cross links only
  filter(TG == "Phenylalanine") %>% 
  arrange(MA) #sorts by mass, low to high
```

###Protein Backbone Analysis
In this code chunk, the goal is to count and summarize backbone modifications. First look at backbone alone; next look at the part of the protein where these are ascribed; then look at distribution of all backbone modifications on amino acids (glycine is the most); but, these are all n- or c-term modifications; if you look at protien core modifications, these are all serine/threonine/tyrosine and cysteine.
```{r backbone}
ptm %>% 
  filter(!FT == "CROSSLNK") %>% #omit AA cross links only
  filter(str_detect(PA, "backbone")) %>% 
  arrange(MA) #sorts by mass, low to high

ptm %>% 
  filter(!FT == "CROSSLNK") %>% #omit AA cross links only
  filter(str_detect(PA, "backbone")) %>% 
  count(PA, sort = TRUE)

ptm %>% 
  filter(!FT == "CROSSLNK") %>% #omit AA cross links only
  filter(str_detect(PA, "backbone")) %>% 
  count(PP, sort = TRUE)

ptm %>% 
  filter(!FT == "CROSSLNK") %>% #omit AA cross links only
  filter(str_detect(PA, "backbone")) %>% 
  count(TG, sort = TRUE)

ptm %>% 
  filter(!FT == "CROSSLNK") %>% #omit AA cross links only
  filter(str_detect(PA, "backbone")) %>% 
  filter(TG == "Glycine")

ptm %>% 
  filter(!FT == "CROSSLNK") %>% #omit AA cross links only
  filter(str_detect(PA, "backbone")) %>% 
  filter(str_detect(PP, "core")) %>% 
  count(TG, sort = TRUE)

ptm %>% 
  filter(!FT == "CROSSLNK") %>% #omit AA cross links only
  filter(str_detect(PA, "backbone")) %>% 
  filter(str_detect(PP, "core")) %>% 
  arrange(TG)
```

##Figure 4
In this figure, the goal is to determine how many acyl-CoA species have been measured
Import metabolite data from HMDB (http://www.hmdb.ca/downloads); all metabolites http://www.hmdb.ca/system/downloads/current/hmdb_metabolites.zip
```{r import_fig4, eval=FALSE, include=FALSE}
# Parse the XML files
proteins_raw <- xmlToDataFrame("data/hmdb_proteins.xml")
metabolites_raw <- xmlToDataFrame("data/hmdb_metabolites.xml")
#use feather next time here
save (temp) because extracting XML took a long time for a 4GB XML file!
save(proteins_raw, file = "data/proteins_raw.Rda")
save(metabolites_raw, file = "data/metabolites_raw.Rda")

#Commented out parsing and saving code, but then set this code chunk to eval/include = FALSE, so re-running the entire code fresh will simply skip this chunk, and load the raw data files in the next chunk as a df

```

```{r reload_fig4}
#reload
load("data/proteins_raw.Rda")
load("data/metabolites_raw.Rda")
```

```{r clean_fig4}
#as.X
proteins_raw <- as_tibble(proteins_raw)
metabolites_raw <- as_tibble(metabolites_raw)
metabolites_raw$average_molecular_weight <- as.numeric(metabolites_raw$average_molecular_weight)

#clean
metabolites <- metabolites_raw %>%
  select(one_of("accession", "name", "average_molecular_weight", "chemical_formula", "smiles", "normal_concentrations")) %>% 
  clean_names() %>% 
  remove_empty("rows") 
```

```{r fig4}
#count CoAs
CoA <- metabolites %>% 
  filter(str_detect(name, "CoA")) %>% 
  mutate(average_molecular_weight_noCoA = round(average_molecular_weight - 767.534, 2)) %>%  #substract weight of CoA for appended mass
  arrange(average_molecular_weight_noCoA)

CoA <- CoA %>% #number of carbons
  mutate(carbon_num = str_extract(chemical_formula, "C\\d+")) %>% #extract C then digit, then one or more
  mutate(carbon_num = str_extract(carbon_num, "\\d+")) %>% #to extract digit only
  mutate(carbon_num = as.numeric(carbon_num)) %>% #numeric, to do substraction next
  mutate(carbon_num_acyl = carbon_num - 21) %>% #remove number of carbons in CoA alone, to get acyls
  slice(-1:-6) %>%  #typos in the dataset
  arrange(carbon_num_acyl) %>% 
  mutate (type = "CoA")

CoA <- CoA %>% #number of oxygens
  mutate(o2_num = str_extract(chemical_formula, "O\\d+")) %>% #extract C then digit, then one or more
  mutate(o2_num = str_extract(o2_num, "\\d+")) %>% #to extract digit only
  mutate(o2_num = as.numeric(o2_num)) %>% #numeric, to do substraction next
  mutate(o2_num_acyl = o2_num - 16) %>% #remove number of oxygens in CoA alone, to get acyls
  slice(-1) #remove dephosphoCoA

CoA <- CoA %>%
  separate(smiles, into = c("smiles1", "smiles2"), sep = "S", remove = FALSE, extra = "merge") #split smiles code on the Sulfur(S); needed to split this from the next code chunk, so it runs first; if not, then throws an error that 'smiles1' does not exist (length is zero)
#https://en.wikipedia.org/wiki/Simplified_molecular-input_line-entry_system

CoA <- CoA %>%
  mutate(smiles_acyl = if_else(str_detect(smiles1, "P"), smiles2, smiles1)) #this is the code that pulls the 'acyl' from the split smiles data, and discards the CoA, becuase it contains the phosphate; this will allow me to look only at the possible protein appendage.

CoA <- CoA %>%
  mutate(acyl_description = if_else(str_detect(smiles_acyl, "\\(O\\)\\=O"), "Carboxyl", 
                                    if_else(str_detect(smiles_acyl, "CO"), "Hydroxyl", 
                                    if_else(str_detect(smiles_acyl, "C\\(O\\)C"), "Hydroxyl",
                                    if_else(str_detect(smiles_acyl, "C\\=C"), "Methylene",   
                                    if_else(str_detect(smiles_acyl, "C\\(\\=C\\)"), "Methylene",  
                                    if_else(str_detect(smiles_acyl, "CC\\=O"), "Aldehyde", #hardcode aldehyde
                                    if_else(str_detect(smiles_acyl, "C\\=O"), "Straight", #hardcode formyl        
                                    if_else(str_detect(smiles_acyl, "C\\(C\\)"), "Branched",        
                                    if_else(str_detect(smiles_acyl, "CCC"), "Straight", 
                                    if_else(str_detect(smiles_acyl, "CC\\(\\=O\\)"), "Straight", #hardcode acetyl         
                                        "Other")))))))))))
         
CoA %>%
  filter(str_detect(smiles_acyl, "N")) %>% #looking
  count(smiles_acyl, sort = TRUE)

CoA %>%
  count(average_molecular_weight, sort = TRUE) #code chunk to count acyl-CoAs, both total and discrete Molecular Weights

ggplot(CoA) +
  geom_bar(aes(x = carbon_num_acyl, fill = acyl_description), color = "black", width = 0.8) +
  labs(x = "Acyl-CoA Chain Length (# of carbons)", y = "Count") +
  expand_limits(y = 40) +
  scale_fill_viridis(discrete = TRUE, direction = -1, option = "magma", name = "Acyl Class") +
  scale_x_continuous(breaks = c(0,2,4,6,8,10,12,14,16,18,20,22,24,26,28), limits = c(0,28)) + 
  NULL

#save plot
ggsave("output/fig4.pdf", plot = last_plot(), width = 5, height = 5, dpi = 600)

CoA %>% 
  count(average_molecular_weight_noCoA, sort = TRUE)

```

##Acyl-phosphate Analysis
Code chunk to count acyl-phosphates. 10 total counted, although strangely two are listed at 266 Da. Same or different?
```{r acyl_phosphates}
phosphate <- metabolites %>% 
  filter(str_detect(smiles, "C\\(=O\\)OP")) %>% #regex the smiles code for carbonyl-phosphate bond
  arrange(average_molecular_weight) %>% 
  mutate (type = "Acyl Phosphate") %>% #duplicate entry!
  mutate(pre_post = if_else(str_detect(smiles, "C\\(=O\\)OP"), "pre", "post")) %>% 
  separate(smiles, into = c("pre_smiles", "post_smiles"), sep = "P", remove = FALSE, extra = "merge") %>%  #merge is to prevent discarding data
  mutate(added_carbons = if_else(grepl("pre", pre_post), str_count(pre_smiles, "C"), str_count(post_smiles, "C")))
phosphate

phosphate %>%
  count(average_molecular_weight, sort = TRUE) #code chunk to count acyl-CoAs, both total and discrete Molecular Weights

```

##Figure 5
In this figure, the goal is to determine how many reactive [human] metabolites there are and to determine how many are associated with PTMs
```{r fig5a}
#count thioesters
thioester <- metabolites %>% 
  filter(str_detect(smiles, "C\\(=O\\)S") | str_detect(smiles, "SC\\(=O\\)")) %>% #regex the smiles code for thioesters at either orientation
  arrange(average_molecular_weight) %>% 
  mutate(type = "Thioester") %>% 
  mutate(pre_post = if_else(str_detect(smiles, "C\\(=O\\)S"), "pre", "post")) %>% 
  separate(smiles, into = c("pre_smiles", "post_smiles"), sep = "S", remove = FALSE, extra = "merge" ) %>%  #merge is to prevent discarding data "Expected 2 pieces. Additional pieces discarded in 26 rows"
  mutate(added_carbons = if_else(grepl("pre", pre_post), str_count(pre_smiles, "C"), str_count(post_smiles, "C"))) #entry errors
  
#because the smiles code has thioesters with orientations that could add carbon on either sides of the code, the first mutate adds a pre_post column to indicate which side will be added; then the separate will split at the thiol; then the last mutate will count the number of carbons in either the pre or post column, depending.

#these include all from CoA list, except "CoA-"
#anti_join(CoA, thioester, by = "name")
#semi_join(CoA, thioester, by = "name") leaves 355, which is one less than in the CoA df

#sum(str_count(thioester$smiles, "C\\(=O\\)S")) #346
#sum(str_count(thioester$smiles, "SC\\(=O\\)")) #80

#thioester$added_carbons <- as.factor(thioester$added_carbons)

match2 <- ptm %>% 
  filter(!FT == "CROSSLNK") %>% #omit AA cross links only
  filter(TG == "Lysine") %>% 
  select(MA) %>% 
  round(2) %>% 
  distinct() %>% 
  pull()

match1 <- CoA %>% 
  select(average_molecular_weight_noCoA) %>% 
  round(2) %>% 
  distinct %>% 
  mutate(mod =  if_else(average_molecular_weight_noCoA %in% match2, TRUE, FALSE)) %>% 
  left_join(CoA, by = "average_molecular_weight_noCoA")

match1 %>% count(mod, sort = TRUE)

fig5a <-
  ggplot(match1) +
  geom_histogram(aes(x = average_molecular_weight_noCoA, fill = mod), color = "black", binwidth = 25, center = 0, position = "stack", alpha = 0.9) +
  labs(x = "Molecular Weight (Da)", y = "Possible modifications (#)") +
  scale_fill_viridis(discrete = TRUE, direction = 1, option = "cividis", name = "Acyl-CoA Species", labels = c("Not Matching Lysine Modification", "Matching Lysine Modification")) +
  NULL

#save plot
ggsave("output/fig5a.pdf", plot = last_plot(), width = 7, height = 5, dpi = 600)
```

```{r fig5b}
#count aldehydes
aldehyde <- metabolites %>%
  filter(str_detect(name, "aldehyde")) %>% #str_detect for aldehydes give too many false positives
  arrange(average_molecular_weight) %>% 
  mutate(type = "Aldehyde") %>% 
  mutate(added_carbons = str_count(smiles, "C"))
  
#Merge thioesters, phosphates, aldehydes
carbon <- full_join(thioester, phosphate) %>% 
  full_join(aldehyde) %>% 
  arrange(average_molecular_weight) %>% 
  select(-c("smiles", "pre_smiles", "post_smiles", "pre_post", "normal_concentrations"))

fig5b <-
  ggplot(carbon) +
  geom_bar(aes(x = added_carbons, fill = type), color = "black", width = 0.8) +
  labs(x = "Predicted carbons appended", y = "Possible modifications (#)") +
  scale_fill_viridis(discrete = TRUE, direction = -1, option = "inferno", name = "Class") +
  NULL

#save plot
ggsave("output/fig5b.pdf", plot = last_plot(), width = 5, height = 5, dpi = 600)

#combine figures for editor
fig5ab <- fig5a / fig5b +
  plot_annotation(tag_levels = 'A')
ggsave("output/fig5ab.pdf", plot = fig5ab, width = 8, height = 5, dpi = 600)


```

##Save final files
Code chunk to save files
```{r save}
write_delim(ptm, "output/table_s1.csv", delim = ",", na = "")
write_delim(metabolites, "output/table_s2.csv", delim = ",", na = "")
write_delim(carbon, "output/table_s3.csv", delim = ",", na = "")
beep(sound = 8) #because mario is awesome
```