From 2c760aa491e7901278f37a2305c3cfb1b2e0ff03 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Tue, 20 Feb 2024 20:25:48 -0500 Subject: [PATCH] Preserve executable bit when untarring archives --- crates/uv-extract/src/stream.rs | 98 ++++++++++++++++++++++++++++++++- 1 file changed, 96 insertions(+), 2 deletions(-) diff --git a/crates/uv-extract/src/stream.rs b/crates/uv-extract/src/stream.rs index 6b5d6b62ae59a..2eca5e3e61872 100644 --- a/crates/uv-extract/src/stream.rs +++ b/crates/uv-extract/src/stream.rs @@ -1,4 +1,8 @@ -use std::path::Path; +use async_compression::tokio::write::GzipDecoder; +use futures::StreamExt; +use std::io::Read; +use std::path::{Component, Path, PathBuf}; +use std::pin::Pin; use rustc_hash::FxHashSet; use tokio_util::compat::{FuturesAsyncReadCompatExt, TokioAsyncReadCompatExt}; @@ -104,11 +108,101 @@ pub async fn untar( reader: R, target: impl AsRef, ) -> Result<(), Error> { + /// Determine the path at which the given tar entry will be unpacked, when unpacking into `dst`. + /// + /// This is effectively a vendored version of some logic in `tokio_tar`, within `unpack_in`. + fn unpacked_at(dst: &Path, entry: &Path) -> Option { + // Notes regarding bsdtar 2.8.3 / libarchive 2.8.3: + // * Leading '/'s are trimmed. For example, `///test` is treated as + // `test`. + // * If the filename contains '..', then the file is skipped when + // extracting the tarball. + // * '//' within a filename is effectively skipped. An error is + // logged, but otherwise the effect is as if any two or more + // adjacent '/'s within the filename were consolidated into one + // '/'. + // + // Most of this is handled by the `path` module of the standard + // library, but we specially handle a few cases here as well. + + let mut file_dst = dst.to_path_buf(); + { + for part in entry.components() { + match part { + // Leading '/' characters, root paths, and '.' + // components are just ignored and treated as "empty + // components" + Component::Prefix(..) | Component::RootDir | Component::CurDir => continue, + + // If any part of the filename is '..', then skip over + // unpacking the file to prevent directory traversal + // security issues. See, e.g.: CVE-2001-1267, + // CVE-2002-0399, CVE-2005-1918, CVE-2007-4131 + Component::ParentDir => return None, + + Component::Normal(part) => file_dst.push(part), + } + } + } + + // Skip cases where only slashes or '.' parts were seen, because + // this is effectively an empty filename. + if *dst == *file_dst { + return None; + } + + // Skip entries without a parent (i.e. outside of FS root) + if file_dst.parent().is_none() { + return None; + }; + + Some(file_dst) + } + + /// Unpack the given tar archive into the destination directory. + /// + /// This is equivalent to `archive.unpack_in(dst)`, but it also preserves the executable bit. + async fn unpack>( + archive: &mut tokio_tar::Archive, + dst: P, + ) -> std::io::Result<()> { + let mut entries = archive.entries()?; + let mut pinned = Pin::new(&mut entries); + while let Some(entry) = pinned.next().await { + // Unpack the file into the destination directory. + let mut file = entry?; + file.unpack_in(dst.as_ref()).await?; + + // Preserve the executable bit. + #[cfg(unix)] + { + use std::fs::Permissions; + use std::os::unix::fs::PermissionsExt; + + let mode = file.header().mode()?; + + let has_any_executable_bit = mode & 0o111; + if has_any_executable_bit != 0 { + let path = file.path()?; + if let Some(path) = unpacked_at(dst.as_ref(), &path) { + let permissions = fs_err::tokio::metadata(&path).await?.permissions(); + fs_err::tokio::set_permissions( + &path, + Permissions::from_mode(permissions.mode() | 0o111), + ) + .await?; + } + } + } + } + Ok(()) + } + let decompressed_bytes = async_compression::tokio::bufread::GzipDecoder::new(reader); let mut archive = tokio_tar::ArchiveBuilder::new(decompressed_bytes) .set_preserve_mtime(false) .build(); - Ok(archive.unpack(target.as_ref()).await?) + Ok(unpack(&mut archive, target.as_ref()).await?) } /// Unzip a `.zip` or `.tar.gz` archive into the target directory, without requiring `Seek`.