Skip to content

Commit

Permalink
Add external data from the FARMM study
Browse files Browse the repository at this point in the history
  • Loading branch information
kylebittinger committed Oct 13, 2021
1 parent 2d0241b commit 9f9bb66
Show file tree
Hide file tree
Showing 10 changed files with 1,127 additions and 0 deletions.
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
^.*\.Rproj$
^\.Rproj\.user$
^data-raw$
2 changes: 2 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,5 @@ Imports:
usedist,
dplyr (>= 1.0.0),
tibble (>= 3.0.0)
Depends:
R (>= 2.10)
43 changes: 43 additions & 0 deletions R/farmm.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#' Food And Resulting Microbial Metabolites (FARMM) study data
#'
#' Information on samples collected in the FARMM study, and a distance matrix
#' containing Bray-Curtis distances between the microbial communities in each
#' sample.
#'
#' @references Tanes C, Bittinger K, Gao Y, Friedman ES, Nessel L, Paladhi UR,
#' Chau L, Panfen E, Fischbach MA, Braun J, Xavier RJ, Clish CB, Li H,
#' Bushman FD, Lewis JD, Wu GD. Role of dietary fiber in the recovery of the
#' human gut microbiome and its metabolome. Cell Host Microbe. 2021 Mar
#' 10;29(3):394-407.e5. doi: 10.1016/j.chom.2020.12.012. Epub 2021 Jan 12.
#' PMID: 33440171; PMCID: PMC8022197.
#' @source \url{https://pubmed.ncbi.nlm.nih.gov/33440171/}
#' @name farmm

#' @rdname farmm
#' @format \code{farmm_samples} is a data frame with 414 rows and 11 variables:
#' \describe{
#' \item{sample_id}{unique identifiers for each sample, to match the
#' distances in \code{farmm_bc}}
#' \item{subject_id}{unique identifiers for each subject in the study}
#' \item{study_day}{the day of the study on which the sample was collected}
#' \item{diet}{the diet assigned to each subject}
#' \item{antibiotics}{the antibiotics status of each subject during the study.
#' A factor with three levels: "pre" (before antibiotic exposure),
#' "current" (during antibiotic exposure), and "post" (after antibiotic
#' exposure)}
#' \item{height}{the height of each subject}
#' \item{weight}{the weight of each subject}
#' \item{age}{the age of each subject}
#' \item{bacterial_16S_copies}{the number of bacterial 16S rRNA gene copies
#' per gram feces, giving an estimate of absolute bacterial abundance}
#' \item{num_reads}{the number of high-quality, non-host sequencing reads per
#' sample}
#' \item{host_frac}{the fraction of host reads in the shotgun metagenomic DNA
#' sequencing data}
#' }
"farmm_samples"

#' @rdname farmm
#' @format \code{farmm_bc} is a \code{dist} object containing the Bray-Curtis
#' distances between the 414 samples listed in \code{farmm_samples}.
"farmm_bc"
53 changes: 53 additions & 0 deletions data-raw/farmm.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
library(tidyverse)
library(usedist)

farmm_samples <- read_tsv(
"data-raw/farmm_samples.tsv", show_col_types = FALSE) %>%
filter(SampleType %in% "Feces") %>%
filter(Keep) %>%
filter(!(study_day %in% "PS")) %>%
mutate(study_day = as.integer(study_day)) %>%
mutate(study_group = fct_relevel(study_group, "Omnivore", "Vegan", "EEN")) %>%
mutate(current_antibiotics = fct_recode(
current_antibiotics, pre = "Pre Antibiotics",
current = "Antibiotics Treatment", post = "Post Antibiotics")) %>%
mutate(current_antibiotics = fct_relevel(
current_antibiotics, "pre", "current", "post")) %>%
mutate(host_frac = host / (host + non_host)) %>%
mutate(new_sample_id = paste(
"farmm", SubjectID, sprintf("%02d", study_day), sep = "."))

farmm_new_sample_ids <- farmm_samples$new_sample_id
names(farmm_new_sample_ids) <- farmm_samples$SampleID

farmm_bc <- read_tsv(
"data-raw/farmm_bc.tsv", show_col_types = FALSE) %>%
rename(sample_id = `...1`) %>%
column_to_rownames("sample_id") %>%
as.matrix() %>%
`[`(farmm_samples$SampleID, farmm_samples$SampleID)

colnames(farmm_bc) <- farmm_new_sample_ids[colnames(farmm_bc)]
rownames(farmm_bc) <- farmm_new_sample_ids[rownames(farmm_bc)]

farmm_samples <- farmm_samples %>%
select(
sample_id = new_sample_id,
subject_id = SubjectID,
study_day,
diet = study_group,
antibiotics = current_antibiotics,
height,
weight,
age = Age,
bacterial_16S_copies = copy_num_per_gram_feces,
num_reads = non_host,
host_frac) %>%
arrange(diet, subject_id, study_day)

farmm_bc <- farmm_bc %>%
as.dist() %>%
dist_subset(farmm_samples$sample_id)

usethis::use_data(farmm_samples, overwrite = TRUE)
usethis::use_data(farmm_bc, overwrite = TRUE)
479 changes: 479 additions & 0 deletions data-raw/farmm_bc.tsv

Large diffs are not rendered by default.

479 changes: 479 additions & 0 deletions data-raw/farmm_samples.tsv

Large diffs are not rendered by default.

Binary file added data/farmm_bc.rda
Binary file not shown.
Binary file added data/farmm_samples.rda
Binary file not shown.
56 changes: 56 additions & 0 deletions man/farmm.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 14 additions & 0 deletions tests/testthat/test-farmm.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
test_that("farmm_samples data is available", {
expect_equal(nrow(farmm_samples), 414)
expect_equal(colnames(farmm_samples), c(
"sample_id", "subject_id", "study_day", "diet", "antibiotics", "height",
"weight", "age", "bacterial_16S_copies", "num_reads", "host_frac"))
expect_equal(levels(farmm_samples$diet), c("Omnivore", "Vegan", "EEN"))
expect_equal(levels(farmm_samples$antibiotics), c("pre", "current", "post"))
})

test_that("farmm_bc distance matrix matches farmm_samples data frame", {
expect_equal(dim(as.matrix(farmm_bc)), c(414, 414))
expect_equal(colnames(as.matrix(farmm_bc)), farmm_samples$sample_id)
expect_equal(rownames(as.matrix(farmm_bc)), farmm_samples$sample_id)
})

0 comments on commit 9f9bb66

Please sign in to comment.