Skip to content

Commit 71cde0c

Browse files
committed
Exclude version_downloads from the database dumps.
The current implementation of the database dumps does not work for the partioned version_downloads table. This change excludes the version_downloads table and all its partitions from the dumps.
1 parent b9d691d commit 71cde0c

File tree

3 files changed

+94
-128
lines changed

3 files changed

+94
-128
lines changed

src/tasks/dump_db/dump-db.toml

+72-111
Original file line numberDiff line numberDiff line change
@@ -2,47 +2,66 @@
22
# database table, we set which columns are included in the dump, and optionally
33
# how to filter the rows.
44
#
5-
# <table_name>.columns - a TOML dictionary determining what columns to include.
6-
# possible values are "private" (not included) and "public" (included).
5+
# tables.<table_name>.columns - a TOML dictionary determining what columns to
6+
# include. possible values are "private" (not included) and "public"
7+
# (included).
78
#
8-
# <table_name>.filter - a string that is a valid SQL expression, which is used
9-
# in a WHERE clause to filter the rows of the table.
9+
# tables.<table_name>.filter - a string that is a valid SQL expression, which
10+
# is used in a WHERE clause to filter the rows of the table.
1011
#
11-
# <table_name>.dependencies - an array of table names, used to determine the
12-
# order of the tables in the generated import script. All tables referred
13-
# to by public columns in the current table should be listed, to make sure
14-
# they are imported before this table.
12+
# tables.<table_name>.dependencies - an array of table names, used to determine
13+
# the order of the tables in the generated import script. All tables
14+
# referred to by public columns in the current table should be listed, to
15+
# make sure they are imported before this table.
1516
#
16-
# <table_name>.columns_defaults - a TOML dictionary mapping column names to a
17-
# raw SQL expression that is used as the default value for the column on
18-
# import. This is useful for private columns that are not nullable and do
19-
# not have a default.
20-
21-
[api_tokens.columns]
22-
id = "private"
23-
user_id = "private"
24-
token = "private"
25-
name = "private"
26-
created_at = "private"
27-
last_used_at = "private"
28-
revoked = "private"
29-
30-
[background_jobs.columns]
31-
id = "private"
32-
job_type = "private"
33-
data = "private"
34-
retries = "private"
35-
last_retry = "private"
36-
created_at = "private"
37-
38-
[badges]
17+
# tables.<table_name>.columns_defaults - a TOML dictionary mapping column names
18+
# to a raw SQL expression that is used as the default value for the column
19+
# on import. This is useful for private columns that are not nullable and
20+
# do not have a default.
21+
#
22+
# private_table - an array of tables to consider as completely private. This is
23+
# a shortcut for marking all columns of a table as private.
24+
25+
private_tables = [
26+
"__diesel_schema_migrations",
27+
"api_tokens",
28+
"background_jobs",
29+
"crate_owner_invitations",
30+
"emails",
31+
"follows",
32+
"publish_limit_buckets",
33+
"publish_rate_overrides",
34+
"readme_renderings",
35+
"version_owner_actions",
36+
"versions_published_by",
37+
"version_downloads",
38+
"version_downloads_default",
39+
"version_downloads_archive",
40+
"version_downloads_pre_2017",
41+
"version_downloads_2017",
42+
"version_downloads_2018_q1",
43+
"version_downloads_2018_q2",
44+
"version_downloads_2018_q3",
45+
"version_downloads_2018_q4",
46+
"version_downloads_2019_q1",
47+
"version_downloads_2019_q2",
48+
"version_downloads_2019_q3",
49+
"version_downloads_2019_q4",
50+
"version_downloads_2020_q1",
51+
"version_downloads_2020_q2",
52+
"version_downloads_2020_q3",
53+
"version_downloads_2020_q4",
54+
"version_downloads_2021_q1",
55+
]
56+
57+
[tables.badges]
3958
dependencies = ["crates"]
40-
[badges.columns]
59+
[tables.badges.columns]
4160
crate_id = "public"
4261
badge_type = "public"
4362
attributes = "public"
4463

45-
[categories.columns]
64+
[tables.categories.columns]
4665
id = "public"
4766
category = "public"
4867
slug = "public"
@@ -51,18 +70,10 @@ crates_cnt = "public"
5170
created_at = "public"
5271
path = "public"
5372

54-
[crate_owner_invitations.columns]
55-
invited_user_id = "private"
56-
invited_by_user_id = "private"
57-
crate_id = "private"
58-
created_at = "private"
59-
token = "private"
60-
token_generated_at = "private"
61-
62-
[crate_owners]
73+
[tables.crate_owners]
6374
dependencies = ["crates", "users"]
6475
filter = "NOT deleted"
65-
[crate_owners.columns]
76+
[tables.crate_owners.columns]
6677
crate_id = "public"
6778
owner_id = "public"
6879
created_at = "public"
@@ -72,7 +83,7 @@ updated_at = "private"
7283
owner_kind = "public"
7384
email_notifications = "private"
7485

75-
[crates.columns]
86+
[tables.crates.columns]
7687
id = "public"
7788
name = "public"
7889
updated_at = "public"
@@ -86,21 +97,21 @@ textsearchable_index_col = "public"
8697
repository = "public"
8798
max_upload_size = "public"
8899

89-
[crates_categories]
100+
[tables.crates_categories]
90101
dependencies = ["categories", "crates"]
91-
[crates_categories.columns]
102+
[tables.crates_categories.columns]
92103
crate_id = "public"
93104
category_id = "public"
94105

95-
[crates_keywords]
106+
[tables.crates_keywords]
96107
dependencies = ["crates", "keywords"]
97-
[crates_keywords.columns]
108+
[tables.crates_keywords.columns]
98109
crate_id = "public"
99110
keyword_id = "public"
100111

101-
[dependencies]
112+
[tables.dependencies]
102113
dependencies = ["crates", "versions"]
103-
[dependencies.columns]
114+
[tables.dependencies.columns]
104115
id = "public"
105116
version_id = "public"
106117
crate_id = "public"
@@ -111,99 +122,53 @@ features = "public"
111122
target = "public"
112123
kind = "public"
113124

114-
[__diesel_schema_migrations.columns]
115-
version = "private"
116-
run_on = "private"
117-
118-
[emails.columns]
119-
id = "private"
120-
user_id = "private"
121-
email = "private"
122-
verified = "private"
123-
token = "private"
124-
token_generated_at = "private"
125-
126-
[follows.columns]
127-
user_id = "private"
128-
crate_id = "private"
129-
130-
[keywords.columns]
125+
[tables.keywords.columns]
131126
id = "public"
132127
keyword = "public"
133128
crates_cnt = "public"
134129
created_at = "public"
135130

136-
[metadata.columns]
131+
[tables.metadata.columns]
137132
total_downloads = "public"
138133

139-
[publish_limit_buckets.columns]
140-
user_id = "private"
141-
tokens = "private"
142-
last_refill = "private"
143-
144-
[publish_rate_overrides.columns]
145-
user_id = "private"
146-
burst = "private"
147-
148-
[readme_renderings.columns]
149-
version_id = "private"
150-
rendered_at = "private"
151-
152-
[reserved_crate_names.columns]
134+
[tables.reserved_crate_names.columns]
153135
name = "public"
154136

155-
[teams.columns]
137+
[tables.teams.columns]
156138
id = "public"
157139
login = "public"
158140
github_id = "public"
159141
name = "public"
160142
avatar = "public"
161143

162-
[users]
144+
[tables.users]
163145
filter = """
164146
id in (
165147
SELECT owner_id AS user_id FROM crate_owners WHERE NOT deleted AND owner_kind = 0
166148
UNION
167149
SELECT published_by as user_id FROM versions
168150
)"""
169-
[users.columns]
151+
[tables.users.columns]
170152
id = "public"
171153
gh_access_token = "private"
172154
gh_login = "public"
173155
name = "public"
174156
gh_avatar = "public"
175157
gh_id = "public"
176-
[users.column_defaults]
158+
[tables.users.column_defaults]
177159
gh_access_token = "''"
178160

179-
[version_authors]
161+
[tables.version_authors]
180162
dependencies = ["versions"]
181-
[version_authors.columns]
163+
[tables.version_authors.columns]
182164
id = "public"
183165
version_id = "public"
184166
user_id = "private"
185167
name = "public"
186168

187-
[version_downloads]
188-
dependencies = ["versions"]
189-
[version_downloads.columns]
190-
version_id = "public"
191-
downloads = "public"
192-
counted = "private"
193-
date = "public"
194-
processed = "private"
195-
196-
[version_owner_actions.columns]
197-
id = "private"
198-
version_id = "private"
199-
user_id = "private"
200-
api_token_id = "private"
201-
action = "private"
202-
time = "private"
203-
204-
[versions]
169+
[tables.versions]
205170
dependencies = ["crates", "users"]
206-
[versions.columns]
171+
[tables.versions.columns]
207172
id = "public"
208173
crate_id = "public"
209174
num = "public"
@@ -215,7 +180,3 @@ yanked = "public"
215180
license = "public"
216181
crate_size = "public"
217182
published_by = "public"
218-
219-
[versions_published_by.columns]
220-
version_id = "private"
221-
email = "private"

src/tasks/dump_db/gen_scripts.rs

+22-16
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use std::{
2-
collections::{BTreeMap, VecDeque},
2+
collections::{BTreeMap, HashSet, VecDeque},
33
fs::File,
44
path::Path,
55
};
@@ -75,10 +75,15 @@ impl TableConfig {
7575
}
7676
}
7777

78-
/// Maps table names to the respective configurations. Used to load `dump_db.toml`.
78+
/// Representation of the configuration file dump-db.toml.
79+
///
80+
/// tables – maps table names to the respective configurations.
81+
/// private_tables – names of tables to treat as completely private.
7982
#[derive(Clone, Debug, Default, Deserialize)]
80-
#[serde(transparent)]
81-
struct VisibilityConfig(BTreeMap<String, TableConfig>);
83+
struct VisibilityConfig {
84+
tables: BTreeMap<String, TableConfig>,
85+
private_tables: HashSet<String>,
86+
}
8287

8388
/// Subset of the configuration data to be passed on to the Handlbars template.
8489
#[derive(Debug, Serialize)]
@@ -94,7 +99,7 @@ impl VisibilityConfig {
9499
let mut result = Vec::new();
95100
let mut num_deps = BTreeMap::new();
96101
let mut rev_deps: BTreeMap<_, Vec<_>> = BTreeMap::new();
97-
for (table, config) in self.0.iter() {
102+
for (table, config) in self.tables.iter() {
98103
num_deps.insert(table.as_str(), config.dependencies.len());
99104
for dep in &config.dependencies {
100105
rev_deps
@@ -118,7 +123,7 @@ impl VisibilityConfig {
118123
}
119124
}
120125
assert_eq!(
121-
self.0.len(),
126+
self.tables.len(),
122127
result.len(),
123128
"circular dependencies in database dump configuration detected",
124129
);
@@ -129,7 +134,7 @@ impl VisibilityConfig {
129134
let tables = self
130135
.topological_sort()
131136
.into_iter()
132-
.filter_map(|table| self.0[table].handlebars_context(table))
137+
.filter_map(|table| self.tables[table].handlebars_context(table))
133138
.collect();
134139
HandlebarsContext { tables }
135140
}
@@ -161,18 +166,19 @@ mod tests {
161166
use crate::test_util::pg_connection;
162167
use diesel::prelude::*;
163168
use std::collections::HashSet;
164-
use std::iter::FromIterator;
165169

166170
/// Test whether the visibility configuration matches the schema of the
167171
/// test database.
168172
#[test]
169-
#[should_panic]
170173
fn check_visibility_config() {
171174
let conn = pg_connection();
172-
let db_columns = HashSet::<Column>::from_iter(get_db_columns(&conn));
173-
let vis_columns = toml::from_str::<VisibilityConfig>(include_str!("dump-db.toml"))
174-
.unwrap()
175-
.0
175+
let config: VisibilityConfig = toml::from_str(include_str!("dump-db.toml")).unwrap();
176+
let db_columns: HashSet<Column> = get_db_columns(&conn)
177+
.into_iter()
178+
.filter(|column| !config.private_tables.contains(&column.table_name))
179+
.collect();
180+
let vis_columns = config
181+
.tables
176182
.iter()
177183
.flat_map(|(table, config)| {
178184
config.columns.iter().map(move |(column, _)| Column {
@@ -246,19 +252,19 @@ mod tests {
246252
#[test]
247253
fn test_topological_sort() {
248254
let mut config = VisibilityConfig::default();
249-
let tables = &mut config.0;
255+
let tables = &mut config.tables;
250256
tables.insert("a".to_owned(), table_config_with_deps(&["b", "c"]));
251257
tables.insert("b".to_owned(), table_config_with_deps(&["c", "d"]));
252258
tables.insert("c".to_owned(), table_config_with_deps(&["d"]));
253-
config.0.insert("d".to_owned(), table_config_with_deps(&[]));
259+
tables.insert("d".to_owned(), table_config_with_deps(&[]));
254260
assert_eq!(config.topological_sort(), ["d", "c", "b", "a"]);
255261
}
256262

257263
#[test]
258264
#[should_panic]
259265
fn topological_sort_panics_for_cyclic_dependency() {
260266
let mut config = VisibilityConfig::default();
261-
let tables = &mut config.0;
267+
let tables = &mut config.tables;
262268
tables.insert("a".to_owned(), table_config_with_deps(&["b"]));
263269
tables.insert("b".to_owned(), table_config_with_deps(&["a"]));
264270
config.topological_sort();

src/tests/dump_db.rs

-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ use diesel::{
55
};
66

77
#[test]
8-
#[should_panic]
98
fn dump_db_and_reimport_dump() {
109
let database_url = crate::env("TEST_DATABASE_URL");
1110

0 commit comments

Comments
 (0)