diff --git a/crates/uv-extract/src/lib.rs b/crates/uv-extract/src/lib.rs index d9b25aeb8338..9cd38d3f0d11 100644 --- a/crates/uv-extract/src/lib.rs +++ b/crates/uv-extract/src/lib.rs @@ -4,4 +4,5 @@ pub use sync::*; mod error; pub mod stream; mod sync; +mod tar; mod vendor; diff --git a/crates/uv-extract/src/stream.rs b/crates/uv-extract/src/stream.rs index 6b5d6b62ae59..b76cdfd846f5 100644 --- a/crates/uv-extract/src/stream.rs +++ b/crates/uv-extract/src/stream.rs @@ -1,5 +1,7 @@ use std::path::Path; +use std::pin::Pin; +use futures::StreamExt; use rustc_hash::FxHashSet; use tokio_util::compat::{FuturesAsyncReadCompatExt, TokioAsyncReadCompatExt}; @@ -97,6 +99,44 @@ pub async fn unzip( Ok(()) } +/// Unpack the given tar archive into the destination directory. +/// +/// This is equivalent to `archive.unpack_in(dst)`, but it also preserves the executable bit. +async fn untar_in>( + archive: &mut tokio_tar::Archive, + dst: P, +) -> std::io::Result<()> { + let mut entries = archive.entries()?; + let mut pinned = Pin::new(&mut entries); + while let Some(entry) = pinned.next().await { + // Unpack the file into the destination directory. + let mut file = entry?; + file.unpack_in(dst.as_ref()).await?; + + // Preserve the executable bit. + #[cfg(unix)] + { + use std::fs::Permissions; + use std::os::unix::fs::PermissionsExt; + + let mode = file.header().mode()?; + + let has_any_executable_bit = mode & 0o111; + if has_any_executable_bit != 0 { + if let Some(path) = crate::tar::unpacked_at(dst.as_ref(), &file.path()?) { + let permissions = fs_err::tokio::metadata(&path).await?.permissions(); + fs_err::tokio::set_permissions( + &path, + Permissions::from_mode(permissions.mode() | 0o111), + ) + .await?; + } + } + } + } + Ok(()) +} + /// Unzip a `.tar.gz` archive into the target directory, without requiring `Seek`. /// /// This is useful for unpacking files as they're being downloaded. @@ -108,7 +148,7 @@ pub async fn untar( let mut archive = tokio_tar::ArchiveBuilder::new(decompressed_bytes) .set_preserve_mtime(false) .build(); - Ok(archive.unpack(target.as_ref()).await?) + Ok(untar_in(&mut archive, target.as_ref()).await?) } /// Unzip a `.zip` or `.tar.gz` archive into the target directory, without requiring `Seek`. diff --git a/crates/uv-extract/src/tar.rs b/crates/uv-extract/src/tar.rs new file mode 100644 index 000000000000..a89caf99bbf1 --- /dev/null +++ b/crates/uv-extract/src/tar.rs @@ -0,0 +1,40 @@ +use std::path::{Component, Path, PathBuf}; + +/// Determine the path at which the given tar entry will be unpacked, when unpacking into `dst`. +/// +/// See: +#[cfg_attr(not(target_os = "unix"), allow(dead_code))] +pub(crate) fn unpacked_at(dst: &Path, entry: &Path) -> Option { + let mut file_dst = dst.to_path_buf(); + { + for part in entry.components() { + match part { + // Leading '/' characters, root paths, and '.' + // components are just ignored and treated as "empty + // components" + Component::Prefix(..) | Component::RootDir | Component::CurDir => { + continue; + } + + // If any part of the filename is '..', then skip over + // unpacking the file to prevent directory traversal + // security issues. See, e.g.: CVE-2001-1267, + // CVE-2002-0399, CVE-2005-1918, CVE-2007-4131 + Component::ParentDir => return None, + + Component::Normal(part) => file_dst.push(part), + } + } + } + + // Skip cases where only slashes or '.' parts were seen, because + // this is effectively an empty filename. + if *dst == *file_dst { + return None; + } + + // Skip entries without a parent (i.e. outside of FS root) + file_dst.parent()?; + + Some(file_dst) +}