-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathget_data.R
160 lines (95 loc) · 3.66 KB
/
get_data.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
# install.packages(
# "tidyverse",
# "tidycensus",
# "readxl",
# "openxlsx"
# )
# After you have run the code above, please either delete the code
# or comment out the code using Ctrl-Shift-C
rm(list=ls())
library(tidyverse)
library(tidycensus)
library(readxl)
library(openxlsx)
source("funcs.r")
source("call.r")
# %%%%%%%%%%%% BEGIN MESSING WITH THIS %%%%%%%%%%%%
setwd("~/Documents/work/RKG/code/ACS-Data-Pull")
census_api_key("2f7688b42a2c229e0662079bf0f4f5400cbb7551") # YOU NEED YOUR OWN
# Parameters/FILEPATHS ----
data_path <- "~/Documents/work/RKG/data" # This is the path where "read_file" lives
read_file <- "Data Pull_NashuaNH.xlsx" # This is the file that contains codes for all tables you want to pull
var_sheet <- "Data Pull" # This specifies the sheet in "read_file" that contains the information mentioned above
# inputs ----
acs_years <-c(2013,2018) # Please pick 2 separate years
st <- "NH" # state
geo_level <- "county subdivision" # geographic level. search "tidycensus" online to see different options
cnty <- "Hillsborough County" # County
survey_type <- "acs5" # 5-year ACS, 1-year ACS, etc...
cnty_name <- str_replace(cnty," ","_") # Please do not touch this.
name <- "Nashua city"
# ******** IMPORTANT PLEASE READ **********
# CHOOSE st, cnty_name, or insert the name of a custom place you're trying to
# If you do NOT use 'cnty_name', you MUST input the name exactly, CASE SENSITIVE, as it appears
# in the data pulled from the acs
# %%%%%%%%%%% STOP MESSING WITH THIS %%%%%%%%%%%%%%%
# labels ----
labels <- read_excel(paste(data_path,read_file,sep="/"),sheet=var_sheet)
labels <- filter(labels, Source=="ACS Data")
tables <- labels[2]
# wb setup ----
wb <- loadWorkbook(paste(data_path,read_file,sep="/"))
# wb <- createWorkbook()
# loop ----
cnty_require <- c("county","county subdivision","tract","block group")
if(geo_level %in% cnty_require){cnty_val <- cnty} else {cnty_val <- NULL}
for(t in 1:nrow(tables)){
col_num <- 1
yr <- acs_years[1]
df <- get_acs(geography=geo_level,
table = tables[[t,1]],
state = st,
county = cnty_val,
cache_table = TRUE,
year = yr,
survey = survey_type)
if(geo_level == "county subdivision" | geo_level == "place"){
df <- df %>%
filter(str_detect(NAME, name))
}
vars <- load_variables(yr, survey_type, cache = TRUE)
vars <- select(vars, "name","label")
vars <- rename(vars, "variable"="name")
input_df <- inner_join(vars,df,by = "variable")
sht <- tables[[t,1]]
addWorksheet(wb,sht)
writeData(wb,sht,input_df,startCol = col_num)
call.func(t,input_df)
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
col_num <- ncol(df)+5
yr <- acs_years[2]
df_2 <- get_acs(geography=geo_level,
table = tables[[t,1]],
state = st,
county = cnty_val,
cache_table = TRUE,
year = yr,
survey = survey_type)
if(geo_level == "county subdivision" | geo_level == "place"){
df_2 <- df_2 %>%
filter(str_detect(NAME, name))
}
vars <- load_variables(yr, survey_type, cache = TRUE)
vars <- select(vars, "name","label")
vars <- rename(vars, "variable"="name")
input_df_2 <- inner_join(vars,df_2,by = "variable")
sht <- tables[[t,1]]
writeData(wb,sht,input_df_2,startCol = col_num)
call.func(t,input_df_2)
}
# save ----
write_file <- paste(name,st,"Data.xlsx",sep="_")
write_file <- paste(data_path,write_file,sep="/")
saveWorkbook(wb,write_file,overwrite = T)
print(write_file)
#If you get warnings that say "unknown or uninitialised [something]", ignore that warning