-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add external data from the FARMM study
- Loading branch information
1 parent
2d0241b
commit 9f9bb66
Showing
10 changed files
with
1,127 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,3 @@ | ||
^.*\.Rproj$ | ||
^\.Rproj\.user$ | ||
^data-raw$ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,3 +24,5 @@ Imports: | |
usedist, | ||
dplyr (>= 1.0.0), | ||
tibble (>= 3.0.0) | ||
Depends: | ||
R (>= 2.10) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
#' Food And Resulting Microbial Metabolites (FARMM) study data | ||
#' | ||
#' Information on samples collected in the FARMM study, and a distance matrix | ||
#' containing Bray-Curtis distances between the microbial communities in each | ||
#' sample. | ||
#' | ||
#' @references Tanes C, Bittinger K, Gao Y, Friedman ES, Nessel L, Paladhi UR, | ||
#' Chau L, Panfen E, Fischbach MA, Braun J, Xavier RJ, Clish CB, Li H, | ||
#' Bushman FD, Lewis JD, Wu GD. Role of dietary fiber in the recovery of the | ||
#' human gut microbiome and its metabolome. Cell Host Microbe. 2021 Mar | ||
#' 10;29(3):394-407.e5. doi: 10.1016/j.chom.2020.12.012. Epub 2021 Jan 12. | ||
#' PMID: 33440171; PMCID: PMC8022197. | ||
#' @source \url{https://pubmed.ncbi.nlm.nih.gov/33440171/} | ||
#' @name farmm | ||
|
||
#' @rdname farmm | ||
#' @format \code{farmm_samples} is a data frame with 414 rows and 11 variables: | ||
#' \describe{ | ||
#' \item{sample_id}{unique identifiers for each sample, to match the | ||
#' distances in \code{farmm_bc}} | ||
#' \item{subject_id}{unique identifiers for each subject in the study} | ||
#' \item{study_day}{the day of the study on which the sample was collected} | ||
#' \item{diet}{the diet assigned to each subject} | ||
#' \item{antibiotics}{the antibiotics status of each subject during the study. | ||
#' A factor with three levels: "pre" (before antibiotic exposure), | ||
#' "current" (during antibiotic exposure), and "post" (after antibiotic | ||
#' exposure)} | ||
#' \item{height}{the height of each subject} | ||
#' \item{weight}{the weight of each subject} | ||
#' \item{age}{the age of each subject} | ||
#' \item{bacterial_16S_copies}{the number of bacterial 16S rRNA gene copies | ||
#' per gram feces, giving an estimate of absolute bacterial abundance} | ||
#' \item{num_reads}{the number of high-quality, non-host sequencing reads per | ||
#' sample} | ||
#' \item{host_frac}{the fraction of host reads in the shotgun metagenomic DNA | ||
#' sequencing data} | ||
#' } | ||
"farmm_samples" | ||
|
||
#' @rdname farmm | ||
#' @format \code{farmm_bc} is a \code{dist} object containing the Bray-Curtis | ||
#' distances between the 414 samples listed in \code{farmm_samples}. | ||
"farmm_bc" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
library(tidyverse) | ||
library(usedist) | ||
|
||
farmm_samples <- read_tsv( | ||
"data-raw/farmm_samples.tsv", show_col_types = FALSE) %>% | ||
filter(SampleType %in% "Feces") %>% | ||
filter(Keep) %>% | ||
filter(!(study_day %in% "PS")) %>% | ||
mutate(study_day = as.integer(study_day)) %>% | ||
mutate(study_group = fct_relevel(study_group, "Omnivore", "Vegan", "EEN")) %>% | ||
mutate(current_antibiotics = fct_recode( | ||
current_antibiotics, pre = "Pre Antibiotics", | ||
current = "Antibiotics Treatment", post = "Post Antibiotics")) %>% | ||
mutate(current_antibiotics = fct_relevel( | ||
current_antibiotics, "pre", "current", "post")) %>% | ||
mutate(host_frac = host / (host + non_host)) %>% | ||
mutate(new_sample_id = paste( | ||
"farmm", SubjectID, sprintf("%02d", study_day), sep = ".")) | ||
|
||
farmm_new_sample_ids <- farmm_samples$new_sample_id | ||
names(farmm_new_sample_ids) <- farmm_samples$SampleID | ||
|
||
farmm_bc <- read_tsv( | ||
"data-raw/farmm_bc.tsv", show_col_types = FALSE) %>% | ||
rename(sample_id = `...1`) %>% | ||
column_to_rownames("sample_id") %>% | ||
as.matrix() %>% | ||
`[`(farmm_samples$SampleID, farmm_samples$SampleID) | ||
|
||
colnames(farmm_bc) <- farmm_new_sample_ids[colnames(farmm_bc)] | ||
rownames(farmm_bc) <- farmm_new_sample_ids[rownames(farmm_bc)] | ||
|
||
farmm_samples <- farmm_samples %>% | ||
select( | ||
sample_id = new_sample_id, | ||
subject_id = SubjectID, | ||
study_day, | ||
diet = study_group, | ||
antibiotics = current_antibiotics, | ||
height, | ||
weight, | ||
age = Age, | ||
bacterial_16S_copies = copy_num_per_gram_feces, | ||
num_reads = non_host, | ||
host_frac) %>% | ||
arrange(diet, subject_id, study_day) | ||
|
||
farmm_bc <- farmm_bc %>% | ||
as.dist() %>% | ||
dist_subset(farmm_samples$sample_id) | ||
|
||
usethis::use_data(farmm_samples, overwrite = TRUE) | ||
usethis::use_data(farmm_bc, overwrite = TRUE) |
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Binary file not shown.
Binary file not shown.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
test_that("farmm_samples data is available", { | ||
expect_equal(nrow(farmm_samples), 414) | ||
expect_equal(colnames(farmm_samples), c( | ||
"sample_id", "subject_id", "study_day", "diet", "antibiotics", "height", | ||
"weight", "age", "bacterial_16S_copies", "num_reads", "host_frac")) | ||
expect_equal(levels(farmm_samples$diet), c("Omnivore", "Vegan", "EEN")) | ||
expect_equal(levels(farmm_samples$antibiotics), c("pre", "current", "post")) | ||
}) | ||
|
||
test_that("farmm_bc distance matrix matches farmm_samples data frame", { | ||
expect_equal(dim(as.matrix(farmm_bc)), c(414, 414)) | ||
expect_equal(colnames(as.matrix(farmm_bc)), farmm_samples$sample_id) | ||
expect_equal(rownames(as.matrix(farmm_bc)), farmm_samples$sample_id) | ||
}) |