-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathGrinDataProcessing.R
55 lines (46 loc) · 1.63 KB
/
GrinDataProcessing.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
##################################################
## Project: Soybase Pedigree Visualization
## Script purpose: Process the grin_results file
## Date: 2021-04-20
## Author: Jay Gillenwater
##################################################
library(tidyverse)
library(janitor)
library(vroom)
# Read in all results
AllResults <- vroom("./Data/grin_results.txt")
PINumbers <- vroom("./Data/PI_Numbers.csv") %>% mutate(PI_Num = str_remove(PI_Num, " "))
CrossData <- vroom("./Data/SoybasePedigreeData.csv")
AllResults_reduced <- AllResults %>%
clean_names() %>%
select(grin_accession,
flower_color,
height,
hilum_color,
lodging,
maturity_group,
oil,
protein,
pubescence_color,
seed_quality,
seed_weight,
yield)
AverageTrait <- function(TraitValue){
TraitValue %>%
str_split(";") %>%
unlist() %>%
as.numeric() %>%
mean(na.rm = TRUE)
}
AllResults_reduced %>%
mutate(height = map_dbl(height, AverageTrait),
lodging = map_dbl(lodging, AverageTrait),
oil = map_dbl(oil, AverageTrait),
protein = map_dbl(protein, AverageTrait),
seed_quality = map_dbl(seed_quality, AverageTrait),
seed_weight = map_dbl(seed_weight, AverageTrait),
yield = map_dbl(yield, AverageTrait)) -> AllResults_cleaned
left_join(CrossData, PINumbers) %>%
left_join(., AllResults_cleaned, by = c("PI_Num" = "grin_accession")) -> CrossData_withTraits
write_csv(CrossData_withTraits, "./Data/CrossData_withTraits.csv")
write_csv(AllResults_cleaned, "./Data/GrinTraits.csv")