Skip to content

Commit

Permalink
Merge pull request #80 from rbtcollins/rbt/darwin-performance
Browse files Browse the repository at this point in the history
Fix performance on MacOS
  • Loading branch information
rbtcollins authored Nov 22, 2024
2 parents e750921 + 92a0f1a commit c18afff
Show file tree
Hide file tree
Showing 4 changed files with 232 additions and 113 deletions.
199 changes: 113 additions & 86 deletions src/_impl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,13 @@ cfg_if::cfg_if! {
}
}

pub(crate) fn default_parallel_mode() -> ParallelMode {
#[cfg(all(feature = "parallel", not(target_os = "macos")))]
return ParallelMode::Parallel;
#[cfg(any(not(feature = "parallel"), target_os = "macos"))]
return ParallelMode::Serial;
}

impl super::RemoveDir for std::fs::File {
fn remove_dir_contents(&mut self, debug_root: Option<&Path>) -> Result<()> {
// thunk over to the free version adding in the os-specific IO trait impl
Expand Down Expand Up @@ -62,129 +69,149 @@ pub(crate) fn _remove_dir_contents_path<I: io::Io, P: AsRef<Path>>(path: P) -> R
/// exterior lifetime interface to dir removal
fn _remove_dir_contents<I: io::Io>(d: &mut File, debug_root: &PathComponents<'_>) -> Result<()> {
let owned_handle = I::duplicate_fd(d)?;
remove_dir_contents_recursive::<I>(owned_handle, debug_root)?;
remove_dir_contents_recursive::<I>(owned_handle, debug_root, default_parallel_mode())?;
Ok(())
}

/// deprecated interface
pub(crate) fn remove_dir_all_path<I: io::Io, P: AsRef<Path>>(path: P) -> Result<()> {
pub(crate) fn remove_dir_all_path<I: io::Io, P: AsRef<Path>>(
path: P,
parallel: ParallelMode,
) -> Result<()> {
let p = path.as_ref();
// Opportunity 1 for races
let d = I::open_dir(p)?;
let debug_root = PathComponents::Path(if p.has_root() { p } else { Path::new(".") });
remove_dir_contents_recursive::<OsIo>(d, &debug_root)?;
remove_dir_contents_recursive::<OsIo>(d, &debug_root, parallel)?;
// Opportunity 2 for races
std::fs::remove_dir(&path)?;
#[cfg(feature = "log")]
log::trace!("removed {}", &debug_root);
Ok(())
}

use crate::RemoveDir;
use crate::{ParallelMode, RemoveDir};

use self::path_components::PathComponents;

// Core workhorse, heading towards this being able to be tasks.
#[allow(clippy::map_identity)]
fn remove_dir_contents_recursive<I: io::Io>(
mut d: File,
debug_root: &PathComponents<'_>,
parallel: ParallelMode,
) -> Result<File> {
#[cfg(feature = "log")]
log::trace!("scanning {}", &debug_root);
// We take a os level clone of the FD so that there are no lifetime
// concerns. It would *not* be ok to do readdir on one file twice
// concurrently because of shared kernel state.
// We take a os level clone of the FD so that there are no rust-level
// lifetime concerns. It would *not* be ok to do readdir on one file twice
// (even via the cloned FD) concurrently because of shared kernel state: the
// readdir state is stored per file, not per FD.
let dirfd = I::duplicate_fd(&mut d)?;
cfg_if::cfg_if! {
if #[cfg(feature = "parallel")] {
match parallel {
ParallelMode::Serial => {
let mut iter = fs_at::read_dir(&mut d)?;
iter.try_for_each(|dir_entry| {
scan_and_remove_entry_recursively::<I>(debug_root, &dirfd, dir_entry, parallel)
})?;
}
#[cfg(feature = "parallel")]
_ => {
let iter = fs_at::read_dir(&mut d)?;
let iter = iter.par_bridge();
} else {
let mut iter = fs_at::read_dir(&mut d)?;
iter.try_for_each(|dir_entry| {
scan_and_remove_entry_recursively::<I>(debug_root, &dirfd, dir_entry, parallel)
})?;
}
}
};

iter.try_for_each(|dir_entry| -> Result<()> {
let dir_entry = dir_entry?;
let name = dir_entry.name();
if name == OsStr::new(".") || name == OsStr::new("..") {
return Ok(());
#[cfg(feature = "log")]
log::trace!("scanned {}", &debug_root);
Ok(dirfd)
}

#[allow(clippy::map_identity)]
fn scan_and_remove_entry_recursively<I: io::Io>(
debug_root: &PathComponents<'_>,
dirfd: &File,
dir_entry: Result<fs_at::DirEntry>,
parallel: ParallelMode,
) -> Result<()> {
let dir_entry = dir_entry?;
let name = dir_entry.name();
if name == OsStr::new(".") || name == OsStr::new("..") {
return Ok(());
}
let dir_path = Path::new(name);
let dir_debug_root = PathComponents::Component(debug_root, dir_path);
#[cfg(windows)]
{
// On windows: open the file and then decide what to do with it.
let mut opts = fs_at::OpenOptions::default();
// Could possibly drop a syscall by dropping FILE_READ_ATTRIBUTES
// and trusting read_dir metadata more. OTOH that would introduce a
// race :/.
opts.desired_access(DELETE | FILE_LIST_DIRECTORY | FILE_READ_ATTRIBUTES);
let mut child_file = opts.open_path_at(dirfd, name)?;
let metadata = child_file.metadata()?;
let is_dir = metadata.is_dir();
let is_symlink = metadata.is_symlink();
if is_dir && !is_symlink {
remove_dir_contents_recursive::<I>(
I::duplicate_fd(&mut child_file)?,
&dir_debug_root,
parallel,
)?;
}
let dir_path = Path::new(name);
let dir_debug_root = PathComponents::Component(debug_root, dir_path);
#[cfg(windows)]
{
// On windows: open the file and then decide what to do with it.
let mut opts = fs_at::OpenOptions::default();
// Could possibly drop a syscall by dropping FILE_READ_ATTRIBUTES
// and trusting read_dir metadata more. OTOH that would introduce a
// race :/.
opts.desired_access(DELETE | FILE_LIST_DIRECTORY | FILE_READ_ATTRIBUTES);
let mut child_file = opts.open_path_at(&dirfd, name)?;
let metadata = child_file.metadata()?;
let is_dir = metadata.is_dir();
let is_symlink = metadata.is_symlink();
if is_dir && !is_symlink {
remove_dir_contents_recursive::<I>(
I::duplicate_fd(&mut child_file)?,
&dir_debug_root,
)?;
#[cfg(feature = "log")]
log::trace!("delete: {}", &dir_debug_root);
child_file.delete_by_handle().map_err(|(_f, e)| {
#[cfg(feature = "log")]
log::debug!("error removing {}", dir_debug_root);
e
})?;
}
#[cfg(not(windows))]
{
// Otherwise, open the path safely but normally, fstat to see if its
// a dir, then either unlink or recursively delete
let mut opts = fs_at::OpenOptions::default();
opts.read(true)
.write(fs_at::OpenOptionsWriteMode::Write)
.follow(false);
let child_result = opts.open_dir_at(dirfd, name);
let is_dir = match child_result {
// We expect is_eloop to be the only error
Err(e) if !I::is_eloop(&e) => return Err(e),
Err(_) => false,
Ok(child_file) => {
let metadata = child_file.metadata()?;
let is_dir = metadata.is_dir();
if is_dir {
remove_dir_contents_recursive::<I>(child_file, &dir_debug_root, parallel)?;
#[cfg(feature = "log")]
log::trace!("rmdir: {}", &dir_debug_root);
let opts = fs_at::OpenOptions::default();
opts.rmdir_at(dirfd, name).map_err(|e| {
#[cfg(feature = "log")]
log::debug!("error removing {}", dir_debug_root);
e
})?;
}
is_dir
}
};
if !is_dir {
#[cfg(feature = "log")]
log::trace!("delete: {}", &dir_debug_root);
child_file.delete_by_handle().map_err(|(_f, e)| {
log::trace!("unlink: {}", &dir_debug_root);
opts.unlink_at(dirfd, name).map_err(|e| {
#[cfg(feature = "log")]
log::debug!("error removing {}", dir_debug_root);
e
})?;
}
#[cfg(not(windows))]
{
// Otherwise, open the path safely but normally, fstat to see if its
// a dir, then either unlink or recursively delete
let mut opts = fs_at::OpenOptions::default();
opts.read(true)
.write(fs_at::OpenOptionsWriteMode::Write)
.follow(false);
let child_result = opts.open_dir_at(&dirfd, name);
let is_dir = match child_result {
// We expect is_eloop to be the only error
Err(e) if !I::is_eloop(&e) => return Err(e),
Err(_) => false,
Ok(child_file) => {
let metadata = child_file.metadata()?;
let is_dir = metadata.is_dir();
if is_dir {
remove_dir_contents_recursive::<I>(child_file, &dir_debug_root)?;
#[cfg(feature = "log")]
log::trace!("rmdir: {}", &dir_debug_root);
let opts = fs_at::OpenOptions::default();
opts.rmdir_at(&dirfd, name).map_err(|e| {
#[cfg(feature = "log")]
log::debug!("error removing {}", dir_debug_root);
e
})?;
}
is_dir
}
};
if !is_dir {
#[cfg(feature = "log")]
log::trace!("unlink: {}", &dir_debug_root);
opts.unlink_at(&dirfd, name).map_err(|e| {
#[cfg(feature = "log")]
log::debug!("error removing {}", dir_debug_root);
e
})?;
}
}
#[cfg(feature = "log")]
log::trace!("removed {}", dir_debug_root);

Ok(())
})?;
}
#[cfg(feature = "log")]
log::trace!("scanned {}", &debug_root);
Ok(dirfd)
log::trace!("removed {}", dir_debug_root);

Ok(())
}
1 change: 1 addition & 0 deletions src/_impl/io.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ pub(crate) trait Io {
fn open_dir(p: &Path) -> io::Result<File>;

#[cfg(not(windows))]
#[allow(dead_code)]
fn unique_identifier(d: &File) -> io::Result<Self::UniqueIdentifier>;

#[cfg(not(windows))]
Expand Down
22 changes: 20 additions & 2 deletions src/bin/remove-dir-all.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,15 @@
use std::{io::Result, path::PathBuf};

use clap::Parser;
use clap::{Parser, ValueEnum};

/// What kind of parallelism to use
#[derive(Debug, Clone, Copy, ValueEnum)]
enum Parallelism {
/// No operations in parallel
Serial,
/// Parallelise readdir and unlink operations
Parallel,
}

/// Simple CLI to use remove-dir-alls recursive deletion logic from the command
/// line.
Expand All @@ -10,14 +19,23 @@ struct Cli {
/// Paths to delete
#[arg(value_name = "FILE")]
names: Vec<PathBuf>,
/// Choose the parallelism strategy
#[arg(short = 'p', long = "parallelism")]
parallelism: Option<Parallelism>,
}

fn main() -> Result<()> {
env_logger::init();
let cli = Cli::parse();

let remover = match cli.parallelism {
None => remove_dir_all::RemoverBuilder::new().build(),
Some(Parallelism::Serial) => remove_dir_all::RemoverBuilder::new().serial().build(),
Some(Parallelism::Parallel) => remove_dir_all::RemoverBuilder::new().parallel().build(),
};

for p in cli.names {
remove_dir_all::remove_dir_all(p)?;
remover.remove_dir_all(&p)?;
}
Ok(())
}
Loading

0 comments on commit c18afff

Please sign in to comment.