Skip to content

Commit

Permalink
Preserve .git suffixes and casing in Git dependencies (#2789)
Browse files Browse the repository at this point in the history
## Summary

I noticed in #2769 that I was now stripping `.git` suffixes from Git
URLs after resolving to a precise commit. This PR cleans up the internal
caching to use a better canonical representation: a `RepositoryUrl`
along with a `GitReference`, instead of a `GitUrl` which can contain
non-canonical data. This gives us both better fidelity (preserving the
`.git`, along with any casing that the user provided when defining the
URL) and is overall cleaner and more robust.
  • Loading branch information
charliermarsh authored Apr 3, 2024
1 parent c30a65e commit 684f790
Show file tree
Hide file tree
Showing 7 changed files with 104 additions and 54 deletions.
6 changes: 6 additions & 0 deletions crates/cache-key/src/canonical_url.rs
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,12 @@ impl Deref for RepositoryUrl {
}
}

impl std::fmt::Display for RepositoryUrl {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
std::fmt::Display::fmt(&self.0, f)
}
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
2 changes: 1 addition & 1 deletion crates/distribution-types/src/direct_url.rs
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ impl TryFrom<&DirectGitUrl> for pypi_types::DirectUrl {
vcs_info: pypi_types::VcsInfo {
vcs: pypi_types::VcsKind::Git,
commit_id: value.url.precise().as_ref().map(ToString::to_string),
requested_revision: value.url.reference().map(ToString::to_string),
requested_revision: value.url.reference().as_str().map(ToString::to_string),
},
subdirectory: value.subdirectory.clone(),
})
Expand Down
98 changes: 64 additions & 34 deletions crates/uv-distribution/src/git.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@ use rustc_hash::FxHashMap;
use tracing::debug;
use url::Url;

use cache_key::CanonicalUrl;
use cache_key::{CanonicalUrl, RepositoryUrl};
use distribution_types::DirectGitUrl;
use uv_cache::{Cache, CacheBucket};
use uv_fs::LockedFile;
use uv_git::{Fetch, GitSource, GitUrl};
use uv_git::{Fetch, GitReference, GitSha, GitSource, GitUrl};

use crate::error::Error;
use crate::reporter::Facade;
Expand All @@ -24,7 +24,25 @@ use crate::Reporter;
/// consistent across all invocations. (For example: if a Git URL refers to a branch, like `main`,
/// then the resolved URL should always refer to the same commit across the lifetime of the
/// process.)
static RESOLVED_GIT_REFS: Lazy<Mutex<FxHashMap<GitUrl, GitUrl>>> = Lazy::new(Mutex::default);
static RESOLVED_GIT_REFS: Lazy<Mutex<FxHashMap<RepositoryReference, GitSha>>> =
Lazy::new(Mutex::default);

#[derive(Debug, Clone, PartialEq, Eq, Hash)]
struct RepositoryReference {
/// The URL of the Git repository, with any query parameters and fragments removed.
url: RepositoryUrl,
/// The reference to the commit to use, which could be a branch, tag or revision.
reference: GitReference,
}

impl RepositoryReference {
fn new(git: &GitUrl) -> Self {
Self {
url: RepositoryUrl::new(git.repository()),
reference: git.reference().clone(),
}
}
}

/// Download a source distribution from a Git repository.
pub(crate) async fn fetch_git_archive(
Expand All @@ -40,10 +58,10 @@ pub(crate) async fn fetch_git_archive(
fs::create_dir_all(&lock_dir)
.await
.map_err(Error::CacheWrite)?;
let canonical_url = CanonicalUrl::new(url);
let repository_url = RepositoryUrl::new(url);
let _lock = LockedFile::acquire(
lock_dir.join(cache_key::digest(&canonical_url)),
&canonical_url,
lock_dir.join(cache_key::digest(&repository_url)),
&repository_url,
)
.map_err(Error::CacheWrite)?;

Expand All @@ -52,8 +70,9 @@ pub(crate) async fn fetch_git_archive(
// Extract the resolved URL from the in-memory cache, to save a look-up in the fetch.
let url = {
let resolved_git_refs = RESOLVED_GIT_REFS.lock().unwrap();
if let Some(resolved) = resolved_git_refs.get(&url) {
resolved.clone()
let reference = RepositoryReference::new(&url);
if let Some(resolved) = resolved_git_refs.get(&reference) {
url.with_precise(*resolved)
} else {
url
}
Expand All @@ -70,10 +89,12 @@ pub(crate) async fn fetch_git_archive(
.map_err(Error::Git)?;

// Insert the resolved URL into the in-memory cache.
{
let mut resolved_git_refs = RESOLVED_GIT_REFS.lock().unwrap();
let precise = fetch.git().clone();
resolved_git_refs.insert(url, precise);
if url.precise().is_none() {
if let Some(precise) = fetch.git().precise() {
let mut resolved_git_refs = RESOLVED_GIT_REFS.lock().unwrap();
let reference = RepositoryReference::new(&url);
resolved_git_refs.insert(reference, precise);
}
}

Ok((fetch, subdirectory))
Expand All @@ -92,8 +113,7 @@ pub(crate) async fn resolve_precise(
cache: &Cache,
reporter: Option<&Arc<dyn Reporter>>,
) -> Result<Option<Url>, Error> {
let url = Url::from(CanonicalUrl::new(url));
let DirectGitUrl { url, subdirectory } = DirectGitUrl::try_from(&url).map_err(Error::Git)?;
let DirectGitUrl { url, subdirectory } = DirectGitUrl::try_from(url).map_err(Error::Git)?;

// If the Git reference already contains a complete SHA, short-circuit.
if url.precise().is_some() {
Expand All @@ -103,9 +123,10 @@ pub(crate) async fn resolve_precise(
// If the Git reference is in the in-memory cache, return it.
{
let resolved_git_refs = RESOLVED_GIT_REFS.lock().unwrap();
if let Some(precise) = resolved_git_refs.get(&url) {
let reference = RepositoryReference::new(&url);
if let Some(precise) = resolved_git_refs.get(&reference) {
return Ok(Some(Url::from(DirectGitUrl {
url: precise.clone(),
url: url.with_precise(*precise),
subdirectory,
})));
}
Expand All @@ -123,17 +144,18 @@ pub(crate) async fn resolve_precise(
let fetch = tokio::task::spawn_blocking(move || source.fetch())
.await?
.map_err(Error::Git)?;
let precise = fetch.into_git();
let git = fetch.into_git();

// Insert the resolved URL into the in-memory cache.
{
if let Some(precise) = git.precise() {
let mut resolved_git_refs = RESOLVED_GIT_REFS.lock().unwrap();
resolved_git_refs.insert(url.clone(), precise.clone());
let reference = RepositoryReference::new(&url);
resolved_git_refs.insert(reference, precise);
}

// Re-encode as a URL.
Ok(Some(Url::from(DirectGitUrl {
url: precise,
url: git,
subdirectory,
})))
}
Expand All @@ -153,7 +175,7 @@ pub fn is_same_reference<'a>(a: &'a Url, b: &'a Url) -> bool {
fn is_same_reference_impl<'a>(
a: &'a Url,
b: &'a Url,
resolved_refs: &FxHashMap<GitUrl, GitUrl>,
resolved_refs: &FxHashMap<RepositoryReference, GitSha>,
) -> bool {
// Convert `a` to a Git URL, if possible.
let Ok(a_git) = DirectGitUrl::try_from(&Url::from(CanonicalUrl::new(a))) else {
Expand All @@ -170,29 +192,35 @@ fn is_same_reference_impl<'a>(
return false;
}

// Convert `a` to a repository URL.
let a_ref = RepositoryReference::new(&a_git.url);

// Convert `b` to a repository URL.
let b_ref = RepositoryReference::new(&b_git.url);

// The URLs must refer to the same repository.
if a_git.url.repository() != b_git.url.repository() {
if a_ref.url != b_ref.url {
return false;
}

// If the URLs have the same tag, they refer to the same commit.
if a_git.url.reference() == b_git.url.reference() {
if a_ref.reference == b_ref.reference {
return true;
}

// Otherwise, the URLs must resolve to the same precise commit.
let Some(a_precise) = a_git
.url
.precise()
.or_else(|| resolved_refs.get(&a_git.url).and_then(GitUrl::precise))
.or_else(|| resolved_refs.get(&a_ref).copied())
else {
return false;
};

let Some(b_precise) = b_git
.url
.precise()
.or_else(|| resolved_refs.get(&b_git.url).and_then(GitUrl::precise))
.or_else(|| resolved_refs.get(&b_ref).copied())
else {
return false;
};
Expand All @@ -204,9 +232,11 @@ fn is_same_reference_impl<'a>(
mod tests {
use anyhow::Result;
use rustc_hash::FxHashMap;
use std::str::FromStr;
use url::Url;

use uv_git::GitUrl;
use crate::git::RepositoryReference;
use uv_git::{GitSha, GitUrl};

#[test]
fn same_reference() -> Result<()> {
Expand Down Expand Up @@ -244,10 +274,10 @@ mod tests {
)?;
let mut resolved_refs = FxHashMap::default();
resolved_refs.insert(
GitUrl::try_from(Url::parse("https://example.com/MyProject@main")?)?,
GitUrl::try_from(Url::parse(
"https://example.com/MyProject@164a8735b081663fede48c5041667b194da15d25",
)?)?,
RepositoryReference::new(&GitUrl::try_from(Url::parse(
"https://example.com/MyProject@main",
)?)?),
GitSha::from_str("164a8735b081663fede48c5041667b194da15d25")?,
);
assert!(super::is_same_reference_impl(&a, &b, &resolved_refs));

Expand All @@ -258,10 +288,10 @@ mod tests {
)?;
let mut resolved_refs = FxHashMap::default();
resolved_refs.insert(
GitUrl::try_from(Url::parse("https://example.com/MyProject@main")?)?,
GitUrl::try_from(Url::parse(
"https://example.com/MyProject@f2c9e88f3ec9526bbcec68d150b176d96a750aba",
)?)?,
RepositoryReference::new(&GitUrl::try_from(Url::parse(
"https://example.com/MyProject@main",
)?)?),
GitSha::from_str("f2c9e88f3ec9526bbcec68d150b176d96a750aba")?,
);
assert!(!super::is_same_reference_impl(&a, &b, &resolved_refs));

Expand Down
30 changes: 26 additions & 4 deletions crates/uv-git/src/git.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ const CHECKOUT_READY_LOCK: &str = ".ok";

/// A reference to commit or commit-ish.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub(crate) enum GitReference {
pub enum GitReference {
/// From a branch.
#[allow(unused)]
Branch(String),
Expand Down Expand Up @@ -66,8 +66,29 @@ impl GitReference {
}
}

/// Views the short ID as a `str`.
pub(crate) fn as_str(&self) -> &str {
pub fn precise(&self) -> Option<&str> {
match self {
Self::FullCommit(rev) => Some(rev),
Self::ShortCommit(rev) => Some(rev),
_ => None,
}
}

/// Converts the [`GitReference`] to a `str`.
pub fn as_str(&self) -> Option<&str> {
match self {
Self::Branch(rev) => Some(rev),
Self::Tag(rev) => Some(rev),
Self::BranchOrTag(rev) => Some(rev),
Self::FullCommit(rev) => Some(rev),
Self::ShortCommit(rev) => Some(rev),
Self::Ref(rev) => Some(rev),
Self::DefaultBranch => None,
}
}

/// Converts the [`GitReference`] to a `str` that can be used as a revision.
pub(crate) fn as_rev(&self) -> &str {
match self {
Self::Branch(rev)
| Self::Tag(rev)
Expand All @@ -79,6 +100,7 @@ impl GitReference {
}
}

/// Returns the kind of this reference.
pub(crate) fn kind_str(&self) -> &str {
match self {
Self::Branch(_) => "branch",
Expand Down Expand Up @@ -1034,7 +1056,7 @@ pub(crate) fn fetch(
format!(
"failed to fetch {} `{}`",
reference.kind_str(),
reference.as_str()
reference.as_rev()
)
}),
}
Expand Down
16 changes: 4 additions & 12 deletions crates/uv-git/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use std::str::FromStr;
use url::Url;

use crate::git::GitReference;
pub use crate::git::GitReference;
pub use crate::sha::GitSha;
pub use crate::source::{Fetch, GitSource, Reporter};

Expand All @@ -24,7 +24,7 @@ pub struct GitUrl {

impl GitUrl {
#[must_use]
pub(crate) fn with_precise(mut self, precise: GitSha) -> Self {
pub fn with_precise(mut self, precise: GitSha) -> Self {
self.precise = Some(precise);
self
}
Expand All @@ -35,16 +35,8 @@ impl GitUrl {
}

/// Return the reference to the commit to use, which could be a branch, tag or revision.
pub fn reference(&self) -> Option<&str> {
match &self.reference {
GitReference::Branch(rev)
| GitReference::Tag(rev)
| GitReference::BranchOrTag(rev)
| GitReference::Ref(rev)
| GitReference::FullCommit(rev)
| GitReference::ShortCommit(rev) => Some(rev),
GitReference::DefaultBranch => None,
}
pub fn reference(&self) -> &GitReference {
&self.reference
}

/// Returns `true` if the reference is a full commit.
Expand Down
2 changes: 1 addition & 1 deletion crates/uv-git/src/source.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ impl GitSource {

// Report the checkout operation to the reporter.
let task = self.reporter.as_ref().map(|reporter| {
reporter.on_checkout_start(remote.url(), self.git.reference.as_str())
reporter.on_checkout_start(remote.url(), self.git.reference.as_rev())
});

let (db, actual_rev) = remote.checkout(
Expand Down
4 changes: 2 additions & 2 deletions crates/uv/tests/pip_sync.rs
Original file line number Diff line number Diff line change
Expand Up @@ -569,7 +569,7 @@ fn install_git_tag() -> Result<()> {

let requirements_txt = context.temp_dir.child("requirements.txt");
requirements_txt.touch()?;
requirements_txt.write_str("werkzeug @ git+https://github.com/pallets/werkzeug.git@2.0.0")?;
requirements_txt.write_str("werkzeug @ git+https://github.com/pallets/WerkZeug.git@2.0.0")?;

uv_snapshot!(command(&context)
.arg("requirements.txt")
Expand All @@ -582,7 +582,7 @@ fn install_git_tag() -> Result<()> {
Resolved 1 package in [TIME]
Downloaded 1 package in [TIME]
Installed 1 package in [TIME]
+ werkzeug==2.0.0 (from git+https://github.com/pallets/werkzeug@af160e0b6b7ddd81c22f1652c728ff5ac72d5c74)
+ werkzeug==2.0.0 (from git+https://github.com/pallets/WerkZeug.git@af160e0b6b7ddd81c22f1652c728ff5ac72d5c74)
"###
);

Expand Down

0 comments on commit 684f790

Please sign in to comment.