Skip to content

Commit 0b4ee63

Browse files
authored
Merge pull request #10888 from LawnGnome/invalidate-on-delete
Ensure CDN invalidations occur when crates or crate versions are deleted
2 parents eb7eda8 + ede5689 commit 0b4ee63

File tree

7 files changed

+140
-17
lines changed

7 files changed

+140
-17
lines changed

src/bin/crates-admin/delete_version.rs

+13-1
Original file line numberDiff line numberDiff line change
@@ -105,10 +105,13 @@ pub async fn run(opts: Opts) -> anyhow::Result<()> {
105105
warn!(%crate_name, "Failed to enqueue background job: {error}");
106106
}
107107

108+
let mut paths = Vec::new();
108109
for version in &opts.versions {
109110
debug!(%crate_name, %version, "Deleting crate file from S3");
110111
if let Err(error) = store.delete_crate_file(crate_name, version).await {
111112
warn!(%crate_name, %version, ?error, "Failed to delete crate file from S3");
113+
} else {
114+
paths.push(store.crate_location(crate_name, version));
112115
}
113116

114117
debug!(%crate_name, %version, "Deleting readme file from S3");
@@ -117,9 +120,18 @@ pub async fn run(opts: Opts) -> anyhow::Result<()> {
117120
Err(error) => {
118121
warn!(%crate_name, %version, ?error, "Failed to delete readme file from S3")
119122
}
120-
Ok(_) => {}
123+
Ok(_) => {
124+
paths.push(store.readme_location(crate_name, version));
125+
}
121126
}
122127
}
123128

129+
if let Err(e) = jobs::InvalidateCdns::new(paths.into_iter())
130+
.enqueue(&mut conn)
131+
.await
132+
{
133+
warn!("{crate_name}: Failed to enqueue CDN invalidation background job: {e}");
134+
}
135+
124136
Ok(())
125137
}

src/cloudfront.rs

+18-7
Original file line numberDiff line numberDiff line change
@@ -35,17 +35,28 @@ impl CloudFront {
3535
/// Invalidate a file on CloudFront
3636
///
3737
/// `path` is the path to the file to invalidate, such as `config.json`, or `re/ge/regex`
38-
#[instrument(skip(self))]
3938
pub async fn invalidate(&self, path: &str) -> anyhow::Result<()> {
40-
let path = if path.starts_with('/') {
41-
path.to_string()
42-
} else {
43-
format!("/{path}")
44-
};
39+
self.invalidate_many(vec![path.to_string()]).await
40+
}
4541

42+
/// Invalidate multiple paths on Cloudfront.
43+
#[instrument(skip(self))]
44+
pub async fn invalidate_many(&self, mut paths: Vec<String>) -> anyhow::Result<()> {
4645
let now = chrono::offset::Utc::now().timestamp_micros();
4746

48-
let paths = Paths::builder().quantity(1).items(path).build()?;
47+
// We need to ensure that paths have a starting slash.
48+
for path in paths.iter_mut() {
49+
if !path.starts_with('/') {
50+
*path = format!("/{path}");
51+
}
52+
}
53+
54+
let paths = Paths::builder()
55+
// It looks like you have to set quantity even if you provide a full blown Vec, because
56+
// reasons.
57+
.quantity(paths.len() as i32)
58+
.set_items(Some(paths))
59+
.build()?;
4960

5061
let invalidation_batch = InvalidationBatch::builder()
5162
.caller_reference(format!("{now}"))

src/storage.rs

+31-7
Original file line numberDiff line numberDiff line change
@@ -213,14 +213,16 @@ impl Storage {
213213
apply_cdn_prefix(&self.cdn_prefix, &feed_id.into()).replace('+', "%2B")
214214
}
215215

216+
/// Deletes all crate files for the given crate, returning the paths that were deleted.
216217
#[instrument(skip(self))]
217-
pub async fn delete_all_crate_files(&self, name: &str) -> Result<()> {
218+
pub async fn delete_all_crate_files(&self, name: &str) -> Result<Vec<Path>> {
218219
let prefix = format!("{PREFIX_CRATES}/{name}").into();
219220
self.delete_all_with_prefix(&prefix).await
220221
}
221222

223+
/// Deletes all READMEs for the given crate, returning the paths that were deleted.
222224
#[instrument(skip(self))]
223-
pub async fn delete_all_readmes(&self, name: &str) -> Result<()> {
225+
pub async fn delete_all_readmes(&self, name: &str) -> Result<Vec<Path>> {
224226
let prefix = format!("{PREFIX_READMES}/{name}").into();
225227
self.delete_all_with_prefix(&prefix).await
226228
}
@@ -333,16 +335,24 @@ impl Storage {
333335
self.store.clone()
334336
}
335337

336-
async fn delete_all_with_prefix(&self, prefix: &Path) -> Result<()> {
338+
async fn delete_all_with_prefix(&self, prefix: &Path) -> Result<Vec<Path>> {
337339
let objects = self.store.list(Some(prefix));
338-
let locations = objects.map(|meta| meta.map(|m| m.location)).boxed();
340+
let mut paths = Vec::new();
341+
let locations = objects
342+
.map(|meta| meta.map(|m| m.location))
343+
.inspect(|r| {
344+
if let Ok(path) = r {
345+
paths.push(path.clone());
346+
}
347+
})
348+
.boxed();
339349

340350
self.store
341351
.delete_stream(locations)
342352
.try_collect::<Vec<_>>()
343353
.await?;
344354

345-
Ok(())
355+
Ok(paths)
346356
}
347357

348358
fn attrs(&self, slice: impl IntoIterator<Item = (Attribute, &'static str)>) -> Attributes {
@@ -505,7 +515,14 @@ mod tests {
505515
async fn delete_all_crate_files() {
506516
let storage = prepare().await;
507517

508-
storage.delete_all_crate_files("foo").await.unwrap();
518+
let deleted_files = storage.delete_all_crate_files("foo").await.unwrap();
519+
assert_eq!(
520+
deleted_files,
521+
vec![
522+
"crates/foo/foo-1.0.0.crate".into(),
523+
"crates/foo/foo-1.2.3.crate".into(),
524+
]
525+
);
509526

510527
let expected_files = vec![
511528
"crates/bar/bar-2.0.0.crate",
@@ -520,7 +537,14 @@ mod tests {
520537
async fn delete_all_readmes() {
521538
let storage = prepare().await;
522539

523-
storage.delete_all_readmes("foo").await.unwrap();
540+
let deleted_files = storage.delete_all_readmes("foo").await.unwrap();
541+
assert_eq!(
542+
deleted_files,
543+
vec![
544+
"readmes/foo/foo-1.0.0.html".into(),
545+
"readmes/foo/foo-1.2.3.html".into(),
546+
]
547+
);
524548

525549
let expected_files = vec![
526550
"crates/bar/bar-2.0.0.crate",

src/worker/jobs/delete_crate.rs

+18-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
use crate::storage::FeedId;
22
use crate::worker::Environment;
3+
use crate::worker::jobs::InvalidateCdns;
34
use anyhow::Context;
45
use crates_io_worker::BackgroundJob;
56
use std::sync::Arc;
@@ -25,8 +26,9 @@ impl BackgroundJob for DeleteCrateFromStorage {
2526

2627
async fn run(&self, ctx: Self::Context) -> anyhow::Result<()> {
2728
let name = &self.name;
29+
let feed_id = FeedId::Crate { name };
2830

29-
try_join!(
31+
let (crate_file_paths, readme_paths, _) = try_join!(
3032
async {
3133
info!("{name}: Deleting crate files from S3…");
3234
let result = ctx.storage.delete_all_crate_files(name).await;
@@ -39,13 +41,27 @@ impl BackgroundJob for DeleteCrateFromStorage {
3941
},
4042
async {
4143
info!("{name}: Deleting RSS feed from S3…");
42-
let feed_id = FeedId::Crate { name };
4344
let result = ctx.storage.delete_feed(&feed_id).await;
4445
result.context("Failed to delete RSS feed from S3")
4546
}
4647
)?;
4748

4849
info!("{name}: Successfully deleted crate from S3");
50+
51+
info!("{name}: Enqueuing CDN invalidations");
52+
53+
let mut conn = ctx.deadpool.get().await?;
54+
InvalidateCdns::new(
55+
crate_file_paths
56+
.into_iter()
57+
.chain(readme_paths.into_iter())
58+
.chain(std::iter::once(object_store::path::Path::from(&feed_id))),
59+
)
60+
.enqueue(&mut conn)
61+
.await?;
62+
63+
info!("{name}: Successfully enqueued CDN invalidations.");
64+
4965
Ok(())
5066
}
5167
}

src/worker/jobs/invalidate_cdns.rs

+57
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
use std::sync::Arc;
2+
3+
use anyhow::Context;
4+
use crates_io_worker::BackgroundJob;
5+
6+
use crate::worker::Environment;
7+
8+
/// A background job that invalidates the given paths on all CDNs in use on crates.io.
9+
#[derive(Deserialize, Serialize)]
10+
pub struct InvalidateCdns {
11+
paths: Vec<String>,
12+
}
13+
14+
impl InvalidateCdns {
15+
pub fn new<I>(paths: I) -> Self
16+
where
17+
I: Iterator,
18+
I::Item: ToString,
19+
{
20+
Self {
21+
paths: paths.map(|path| path.to_string()).collect(),
22+
}
23+
}
24+
}
25+
26+
impl BackgroundJob for InvalidateCdns {
27+
const JOB_NAME: &'static str = "invalidate_cdns";
28+
29+
type Context = Arc<Environment>;
30+
31+
async fn run(&self, ctx: Self::Context) -> anyhow::Result<()> {
32+
// Fastly doesn't provide an API to purge multiple paths at once, except through the use of
33+
// surrogate keys. We can't use surrogate keys right now because they require a
34+
// Fastly-specific header, and not all of our traffic goes through Fastly.
35+
//
36+
// For now, we won't parallelise: most crate deletions are for new crates with one (or very
37+
// few) versions, so the actual number of paths being invalidated is likely to be small, and
38+
// this is all happening from either a background job or admin command anyway.
39+
if let Some(fastly) = ctx.fastly() {
40+
for path in self.paths.iter() {
41+
fastly
42+
.invalidate(path)
43+
.await
44+
.with_context(|| format!("Failed to invalidate path on Fastly CDN: {path}"))?;
45+
}
46+
}
47+
48+
if let Some(cloudfront) = ctx.cloudfront() {
49+
cloudfront
50+
.invalidate_many(self.paths.clone())
51+
.await
52+
.context("Failed to invalidate paths on CloudFront CDN")?;
53+
}
54+
55+
Ok(())
56+
}
57+
}

src/worker/jobs/mod.rs

+2
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ pub mod dump_db;
66
mod expiry_notification;
77
mod index;
88
mod index_version_downloads_archive;
9+
mod invalidate_cdns;
910
mod readmes;
1011
pub mod rss;
1112
mod send_publish_notifications;
@@ -23,6 +24,7 @@ pub use self::dump_db::DumpDb;
2324
pub use self::expiry_notification::SendTokenExpiryNotifications;
2425
pub use self::index::{NormalizeIndex, SquashIndex, SyncToGitIndex, SyncToSparseIndex};
2526
pub use self::index_version_downloads_archive::IndexVersionDownloadsArchive;
27+
pub use self::invalidate_cdns::InvalidateCdns;
2628
pub use self::readmes::RenderAndUploadReadme;
2729
pub use self::send_publish_notifications::SendPublishNotificationsJob;
2830
pub use self::sync_admins::SyncAdmins;

src/worker/mod.rs

+1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ impl RunnerExt for Runner<Arc<Environment>> {
2626
.register_job_type::<jobs::DeleteCrateFromStorage>()
2727
.register_job_type::<jobs::DumpDb>()
2828
.register_job_type::<jobs::IndexVersionDownloadsArchive>()
29+
.register_job_type::<jobs::InvalidateCdns>()
2930
.register_job_type::<jobs::NormalizeIndex>()
3031
.register_job_type::<jobs::ProcessCdnLog>()
3132
.register_job_type::<jobs::ProcessCdnLogQueue>()

0 commit comments

Comments
 (0)