Skip to content

Support relative URLs from repo-subdirectory packages #3973

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jan 3, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 25 additions & 12 deletions cargo-registry-markdown/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -245,18 +245,23 @@ static MARKDOWN_EXTENSIONS: [&str; 7] =
/// use cargo_registry_markdown::text_to_html;
///
/// let text = "[Rust](https://rust-lang.org/) is an awesome *systems programming* language!";
/// let rendered = text_to_html(text, "README.md", None);
/// let rendered = text_to_html(text, "README.md", None, None);
/// assert_eq!(rendered, "<p><a href=\"https://rust-lang.org/\" rel=\"nofollow noopener noreferrer\">Rust</a> is an awesome <em>systems programming</em> language!</p>\n");
/// ```
pub fn text_to_html(text: &str, path: &str, base_url: Option<&str>) -> String {
let path = Path::new(path);
let base_dir = path.parent().and_then(|p| p.to_str()).unwrap_or("");

if path.extension().is_none() {
pub fn text_to_html(
text: &str,
readme_path_in_pkg: &str,
base_url: Option<&str>,
pkg_path_in_vcs: Option<&str>,
) -> String {
let path_in_vcs = Path::new(pkg_path_in_vcs.unwrap_or("")).join(readme_path_in_pkg);
let base_dir = path_in_vcs.parent().and_then(|p| p.to_str()).unwrap_or("");

if path_in_vcs.extension().is_none() {
return markdown_to_html(text, base_url, base_dir);
}

if let Some(ext) = path.extension().and_then(|ext| ext.to_str()) {
if let Some(ext) = path_in_vcs.extension().and_then(|ext| ext.to_str()) {
if MARKDOWN_EXTENSIONS.contains(&ext.to_lowercase().as_str()) {
return markdown_to_html(text, base_url, base_dir);
}
Expand Down Expand Up @@ -477,30 +482,38 @@ mod tests {
"s1/s2/readme.md",
] {
assert_eq!(
text_to_html("*lobster*", f, None),
text_to_html("*lobster*", f, None, None),
"<p><em>lobster</em></p>\n"
);
}

assert_eq!(
text_to_html("*[lobster](docs/lobster)*", "readme.md", Some("https://github.com/rust-lang/test")),
text_to_html("*[lobster](docs/lobster)*", "readme.md", Some("https://github.com/rust-lang/test"), None),
"<p><em><a href=\"https://github.com/rust-lang/test/blob/HEAD/docs/lobster\" rel=\"nofollow noopener noreferrer\">lobster</a></em></p>\n"
);
assert_eq!(
text_to_html("*[lobster](docs/lobster)*", "s/readme.md", Some("https://github.com/rust-lang/test")),
text_to_html("*[lobster](docs/lobster)*", "s/readme.md", Some("https://github.com/rust-lang/test"), None),
"<p><em><a href=\"https://github.com/rust-lang/test/blob/HEAD/s/docs/lobster\" rel=\"nofollow noopener noreferrer\">lobster</a></em></p>\n"
);
assert_eq!(
text_to_html("*[lobster](docs/lobster)*", "s1/s2/readme.md", Some("https://github.com/rust-lang/test")),
text_to_html("*[lobster](docs/lobster)*", "s1/s2/readme.md", Some("https://github.com/rust-lang/test"), None),
"<p><em><a href=\"https://github.com/rust-lang/test/blob/HEAD/s1/s2/docs/lobster\" rel=\"nofollow noopener noreferrer\">lobster</a></em></p>\n"
);
assert_eq!(
text_to_html("*[lobster](docs/lobster)*", "s1/s2/readme.md", Some("https://github.com/rust-lang/test"), Some("path/in/vcs/")),
"<p><em><a href=\"https://github.com/rust-lang/test/blob/HEAD/path/in/vcs/s1/s2/docs/lobster\" rel=\"nofollow noopener noreferrer\">lobster</a></em></p>\n"
);
assert_eq!(
text_to_html("*[lobster](docs/lobster)*", "s1/s2/readme.md", Some("https://github.com/rust-lang/test"), Some("path/in/vcs")),
"<p><em><a href=\"https://github.com/rust-lang/test/blob/HEAD/path/in/vcs/s1/s2/docs/lobster\" rel=\"nofollow noopener noreferrer\">lobster</a></em></p>\n"
);
}

#[test]
fn text_to_html_renders_other_things() {
for f in &["readme.exe", "readem.org", "blah.adoc"] {
assert_eq!(
text_to_html("<script>lobster</script>\n\nis my friend\n", f, None),
text_to_html("<script>lobster</script>\n\nis my friend\n", f, None, None),
"&lt;script&gt;lobster&lt;/script&gt;<br>\n<br>\nis my friend<br>\n"
);
}
Expand Down
5 changes: 5 additions & 0 deletions src/admin/render_readmes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -205,10 +205,15 @@ fn render_pkg_readme<R: Read>(mut archive: Archive<R>, pkg_name: &str) -> anyhow
let contents = find_file_by_path(&mut entries, Path::new(&path))
.with_context(|| format!("Failed to read {} file", readme_path))?;

// pkg_path_in_vcs Unsupported from admin::render_readmes. See #4095
// Would need access to cargo_vcs_info
let pkg_path_in_vcs = None;

text_to_html(
&contents,
&readme_path,
manifest.package.repository.as_deref(),
pkg_path_in_vcs,
)
};
return Ok(rendered);
Expand Down
76 changes: 68 additions & 8 deletions src/controllers/krate/publish.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use hex::ToHex;
use sha2::{Digest, Sha256};
use std::collections::HashMap;
use std::io::Read;
use std::path::Path;
use std::sync::Arc;
use swirl::Job;

Expand All @@ -18,7 +19,7 @@ use crate::worker;

use crate::schema::*;
use crate::util::errors::{cargo_err, AppResult};
use crate::util::{read_fill, read_le_u32, LimitErrorReader, Maximums};
use crate::util::{read_fill, read_le_u32, CargoVcsInfo, LimitErrorReader, Maximums};
use crate::views::{
EncodableCrate, EncodableCrateDependency, EncodableCrateUpload, GoodCrate, PublishWarnings,
};
Expand Down Expand Up @@ -195,7 +196,8 @@ pub fn publish(req: &mut dyn RequestExt) -> EndpointResult {
LimitErrorReader::new(req.body(), maximums.max_upload_size).read_to_end(&mut tarball)?;
let hex_cksum: String = Sha256::digest(&tarball).encode_hex();
let pkg_name = format!("{}-{}", krate.name, vers);
verify_tarball(&pkg_name, &tarball, maximums.max_unpack_size)?;
let cargo_vcs_info = verify_tarball(&pkg_name, &tarball, maximums.max_unpack_size)?;
let pkg_path_in_vcs = cargo_vcs_info.map(|info| info.path_in_vcs);

if let Some(readme) = new_crate.readme {
worker::render_and_upload_readme(
Expand All @@ -205,6 +207,7 @@ pub fn publish(req: &mut dyn RequestExt) -> EndpointResult {
.readme_file
.unwrap_or_else(|| String::from("README.md")),
repo,
pkg_path_in_vcs,
)
.enqueue(&conn)?;
}
Expand Down Expand Up @@ -379,7 +382,11 @@ pub fn add_dependencies(
Ok(git_deps)
}

fn verify_tarball(pkg_name: &str, tarball: &[u8], max_unpack: u64) -> AppResult<()> {
fn verify_tarball(
pkg_name: &str,
tarball: &[u8],
max_unpack: u64,
) -> AppResult<Option<CargoVcsInfo>> {
// All our data is currently encoded with gzip
let decoder = GzDecoder::new(tarball);

Expand All @@ -389,8 +396,12 @@ fn verify_tarball(pkg_name: &str, tarball: &[u8], max_unpack: u64) -> AppResult<

// Use this I/O object now to take a peek inside
let mut archive = tar::Archive::new(decoder);

let vcs_info_path = Path::new(&pkg_name).join(".cargo_vcs_info.json");
let mut vcs_info = None;

for entry in archive.entries()? {
let entry = entry.map_err(|err| {
let mut entry = entry.map_err(|err| {
err.chain(cargo_err(
"uploaded tarball is malformed or too large when decompressed",
))
Expand All @@ -401,9 +412,15 @@ fn verify_tarball(pkg_name: &str, tarball: &[u8], max_unpack: u64) -> AppResult<
// upload a tarball that contains both `foo-0.1.0/` source code as well
// as `bar-0.1.0/` source code, and this could overwrite other crates in
// the registry!
if !entry.path()?.starts_with(&pkg_name) {
let entry_path = entry.path()?;
if !entry_path.starts_with(&pkg_name) {
return Err(cargo_err("invalid tarball uploaded"));
}
if entry_path == vcs_info_path {
let mut contents = String::new();
entry.read_to_string(&mut contents)?;
vcs_info = CargoVcsInfo::from_contents(&contents).ok();
}

// Historical versions of the `tar` crate which Cargo uses internally
// don't properly prevent hard links and symlinks from overwriting
Expand All @@ -415,7 +432,7 @@ fn verify_tarball(pkg_name: &str, tarball: &[u8], max_unpack: u64) -> AppResult<
return Err(cargo_err("invalid tarball uploaded"));
}
}
Ok(())
Ok(vcs_info)
}

#[cfg(test)]
Expand All @@ -435,14 +452,57 @@ mod tests {
#[test]
fn verify_tarball_test() {
let mut pkg = tar::Builder::new(vec![]);
add_file(&mut pkg, "foo-0.0.1/.cargo_vcs_info.json", br#"{}"#);
add_file(&mut pkg, "foo-0.0.1/Cargo.toml", b"");
let mut serialized_archive = vec![];
GzEncoder::new(pkg.into_inner().unwrap().as_slice(), Default::default())
.read_to_end(&mut serialized_archive)
.unwrap();

let limit = 512 * 1024 * 1024;
assert_ok!(verify_tarball("foo-0.0.1", &serialized_archive, limit));
assert_eq!(
verify_tarball("foo-0.0.1", &serialized_archive, limit).unwrap(),
None
);
assert_err!(verify_tarball("bar-0.0.1", &serialized_archive, limit));
}

#[test]
fn verify_tarball_test_incomplete_vcs_info() {
let mut pkg = tar::Builder::new(vec![]);
add_file(&mut pkg, "foo-0.0.1/Cargo.toml", b"");
add_file(
&mut pkg,
"foo-0.0.1/.cargo_vcs_info.json",
br#"{"unknown": "field"}"#,
);
let mut serialized_archive = vec![];
GzEncoder::new(pkg.into_inner().unwrap().as_slice(), Default::default())
.read_to_end(&mut serialized_archive)
.unwrap();
let limit = 512 * 1024 * 1024;
let vcs_info = verify_tarball("foo-0.0.1", &serialized_archive, limit)
.unwrap()
.unwrap();
assert_eq!(vcs_info.path_in_vcs, "");
}

#[test]
fn verify_tarball_test_vcs_info() {
let mut pkg = tar::Builder::new(vec![]);
add_file(&mut pkg, "foo-0.0.1/Cargo.toml", b"");
add_file(
&mut pkg,
"foo-0.0.1/.cargo_vcs_info.json",
br#"{"path_in_vcs": "path/in/vcs"}"#,
);
let mut serialized_archive = vec![];
GzEncoder::new(pkg.into_inner().unwrap().as_slice(), Default::default())
.read_to_end(&mut serialized_archive)
.unwrap();
let limit = 512 * 1024 * 1024;
let vcs_info = verify_tarball("foo-0.0.1", &serialized_archive, limit)
.unwrap()
.unwrap();
assert_eq!(vcs_info.path_in_vcs, "path/in/vcs");
}
}
43 changes: 43 additions & 0 deletions src/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,46 @@ impl Maximums {
}
}
}

/// Represents relevant contents of .cargo_vcs_info.json file when uploaded from cargo
/// or downloaded from crates.io
#[derive(Debug, Deserialize, Eq, PartialEq)]
pub struct CargoVcsInfo {
/// Path to the package within repo (empty string if root). / not \
#[serde(default)]
pub path_in_vcs: String,
}

impl CargoVcsInfo {
pub fn from_contents(contents: &str) -> serde_json::Result<Self> {
serde_json::from_str(contents)
}
}

#[cfg(test)]
mod tests {
use super::CargoVcsInfo;

#[test]
fn test_cargo_vcs_info() {
assert_eq!(CargoVcsInfo::from_contents("").ok(), None);
assert_eq!(
CargoVcsInfo::from_contents("{}").unwrap(),
CargoVcsInfo {
path_in_vcs: "".into()
}
);
assert_eq!(
CargoVcsInfo::from_contents(r#"{"path_in_vcs": "hi"}"#).unwrap(),
CargoVcsInfo {
path_in_vcs: "hi".into()
}
);
assert_eq!(
CargoVcsInfo::from_contents(r#"{"path_in_vcs": "hi", "future": "field"}"#).unwrap(),
CargoVcsInfo {
path_in_vcs: "hi".into()
}
);
}
}
8 changes: 7 additions & 1 deletion src/worker/readmes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,17 @@ pub fn render_and_upload_readme(
text: String,
readme_path: String,
base_url: Option<String>,
pkg_path_in_vcs: Option<String>,
) -> Result<(), PerformError> {
use crate::schema::*;
use diesel::prelude::*;

let rendered = text_to_html(&text, &readme_path, base_url.as_deref());
let rendered = text_to_html(
&text,
&readme_path,
base_url.as_deref(),
pkg_path_in_vcs.as_deref(),
);

conn.transaction(|| {
Version::record_readme_rendering(version_id, conn)?;
Expand Down