Skip to content

Commit

Permalink
Include environment variables in interpreter info caching
Browse files Browse the repository at this point in the history
We want to use `sys.path` for package discovery (#2500, #9849). For that, we need to know the correct value of `sys.path`. `sys.path` is a runtime-changeable value, which gets influenced from a lot of different sources: Environment variables, CLI arguments, `.pth` files with scripting, `sys.path.append()` at runtime, etc. We cannot capture them all accurately, especially since it's possible to change `sys.path` mid-execution. Instead, we do a best effort attempt at matching the user's expectation.

A common way to influence `sys.path` that is not using venvs is setting `PYTHONPATH`. To support this we're capturing `PYTHONPATH` as part of the cache invalidation, i.e. we refresh the interpreter metadata if it changed. For completeness, we're also capturing other environment variables documented as influencing `sys.path` or other fields in the interpreter info.

This PR does not include reading registry values for `sys.path` additions on Windows as documented in https://docs.python.org/3.11/using/windows.html#finding-modules. It notably also does not include parsing of python CLI arguments, we only consider their environment variable versions for package installation and listing. We could try parsing CLI flags in `uv run python`, but we'd still miss them when Python is launched indirectly through a script, and it's more consistent to only consider uv's own arguments and environment variables, similar to uv's behavior in other places.
  • Loading branch information
konstin committed Feb 18, 2025
1 parent 929e7c3 commit 029ac7d
Show file tree
Hide file tree
Showing 3 changed files with 123 additions and 43 deletions.
8 changes: 0 additions & 8 deletions crates/uv-cache/src/by_timestamp.rs

This file was deleted.

4 changes: 1 addition & 3 deletions crates/uv-cache/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ use uv_fs::{cachedir, directories, LockedFile};
use uv_normalize::PackageName;
use uv_pypi_types::ResolutionMetadata;

pub use crate::by_timestamp::CachedByTimestamp;
#[cfg(feature = "clap")]
pub use crate::cli::CacheArgs;
use crate::removal::Remover;
Expand All @@ -27,7 +26,6 @@ pub use crate::wheel::WheelCache;
use crate::wheel::WheelCacheKind;

mod archive;
mod by_timestamp;
#[cfg(feature = "clap")]
mod cli;
mod removal;
Expand Down Expand Up @@ -1034,7 +1032,7 @@ impl CacheBucket {
Self::SourceDistributions => "sdists-v8",
Self::FlatIndex => "flat-index-v2",
Self::Git => "git-v0",
Self::Interpreter => "interpreter-v4",
Self::Interpreter => "interpreter-v5",
// Note that when bumping this, you'll also need to bump it
// in `crates/uv/tests/it/cache_clean.rs`.
Self::Simple => "simple-v15",
Expand Down
154 changes: 122 additions & 32 deletions crates/uv-python/src/interpreter.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
use std::borrow::Cow;
use std::env::consts::ARCH;
use std::ffi::OsString;
use std::fmt::{Display, Formatter};
use std::io;
use std::path::{Path, PathBuf};
use std::process::{Command, ExitStatus};
use std::sync::OnceLock;
use std::{env, io};

use configparser::ini::Ini;
use fs_err as fs;
Expand All @@ -14,7 +15,7 @@ use serde::{Deserialize, Serialize};
use thiserror::Error;
use tracing::{debug, trace, warn};

use uv_cache::{Cache, CacheBucket, CachedByTimestamp, Freshness};
use uv_cache::{Cache, CacheBucket, CacheEntry, Freshness};
use uv_cache_info::Timestamp;
use uv_cache_key::cache_digest;
use uv_fs::{write_atomic_sync, PythonExt, Simplified};
Expand Down Expand Up @@ -713,6 +714,42 @@ pub enum InterpreterInfoError {
},
}

/// Environment variables that can change the values of [`InterpreterInfo`].
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq, Eq)]
struct PythonEnvVars {
/// `PYTHONHOME` overrides `sys.prefix`.
pythonhome: Option<OsString>,
/// `PYTHONPATH` adds to `sys.path`.
pythonpath: Option<OsString>,
/// `PYTHONSAFEPATH` influences `sys.path`.
pythonsafepath: Option<OsString>,
/// `PYTHONPLATLIBDIR` influences `sys.path`.
pythonplatlibdir: Option<OsString>,
/// `PYTHONNOUSERSITE` influences `sys.path`.
pythonnousersite: Option<OsString>,
/// `PYTHONUSERBASE` influences `sys.path`.
pythonuserbase: Option<OsString>,
/// `APPDATA` influences `sys.path` through the user site packages (windows).
appdata: Option<OsString>,
/// `HOME` influences `sys.path` through the user site packages (unix).
home: Option<OsString>,
}

impl PythonEnvVars {
fn from_env() -> Self {
Self {
pythonhome: env::var_os("PYTHONHOME"),
pythonpath: env::var_os("PYTHONPATH"),
pythonsafepath: env::var_os("PYTHONSAFEPATH"),
pythonplatlibdir: env::var_os("PYTHONPLATLIBDIR"),
pythonnousersite: env::var_os("PYTHONNOUSERSITE"),
pythonuserbase: env::var_os("PYTHONUSERBASE"),
appdata: env::var_os("APPDATA"),
home: env::var_os("HOME"),
}
}
}

#[derive(Debug, Deserialize, Serialize, Clone)]
struct InterpreterInfo {
platform: Platform,
Expand All @@ -732,6 +769,18 @@ struct InterpreterInfo {
gil_disabled: bool,
}

#[derive(Debug, Deserialize, Serialize, Clone)]
struct CachedInterpreterInfo {
/// Information about a Python interpreter at a path.
data: InterpreterInfo,
/// The last modified timestamp of the Python interpreter path.
///
/// It is ctime on unix.
timestamp: Timestamp,
/// Environment variables that can influence the other keys used for cache invalidation.
env_vars: PythonEnvVars,
}

impl InterpreterInfo {
/// Return the resolved [`InterpreterInfo`] for the given Python executable.
pub(crate) fn query(interpreter: &Path, cache: &Cache) -> Result<Self, Error> {
Expand Down Expand Up @@ -869,36 +918,10 @@ impl InterpreterInfo {
})?;

// Read from the cache.
if cache
.freshness(&cache_entry, None)
.is_ok_and(Freshness::is_fresh)
if let Some(value) =
Self::read_and_validate_cache(executable, cache, &cache_entry, modified)
{
if let Ok(data) = fs::read(cache_entry.path()) {
match rmp_serde::from_slice::<CachedByTimestamp<Self>>(&data) {
Ok(cached) => {
if cached.timestamp == modified {
trace!(
"Cached interpreter info for Python {}, skipping probing: {}",
cached.data.markers.python_full_version(),
executable.user_display()
);
return Ok(cached.data);
}

trace!(
"Ignoring stale interpreter markers for: {}",
executable.user_display()
);
}
Err(err) => {
warn!(
"Broken interpreter cache entry at {}, removing: {err}",
cache_entry.path().user_display()
);
let _ = fs_err::remove_file(cache_entry.path());
}
}
}
return Ok(value);
}

// Otherwise, run the Python script.
Expand All @@ -914,15 +937,82 @@ impl InterpreterInfo {
fs::create_dir_all(cache_entry.dir())?;
write_atomic_sync(
cache_entry.path(),
rmp_serde::to_vec(&CachedByTimestamp {
rmp_serde::to_vec(&CachedInterpreterInfo {
timestamp: modified,
data: info.clone(),
env_vars: PythonEnvVars::from_env(),
})?,
)?;
}

Ok(info)
}

/// If a cache entry for the Python interpreter exists and it's fresh, return it.
fn read_and_validate_cache(
executable: &Path,
cache: &Cache,
cache_entry: &CacheEntry,
modified: Timestamp,
) -> Option<InterpreterInfo> {
if !cache
.freshness(cache_entry, None)
.is_ok_and(Freshness::is_fresh)
{
return None;
}

let data = match fs::read(cache_entry.path()) {
Ok(data) => data,
Err(err) if err.kind() == io::ErrorKind::NotFound => {
return None;
}
Err(err) => {
warn!(
"Broken interpreter cache entry at {}, removing: {err}",
cache_entry.path().user_display()
);
let _ = fs_err::remove_file(cache_entry.path());
return None;
}
};

let cached = match rmp_serde::from_slice::<CachedInterpreterInfo>(&data) {
Ok(cached) => cached,
Err(err) => {
warn!(
"Broken interpreter cache entry at {}, removing: {err}",
cache_entry.path().user_display()
);
let _ = fs_err::remove_file(cache_entry.path());
return None;
}
};

if cached.timestamp != modified {
trace!(
"Ignoring stale cached interpreter info for: `{}`",
executable.user_display()
);
return None;
}

if cached.env_vars != PythonEnvVars::from_env() {
trace!(
"Ignoring cached interpreter info due to changed environment variables for: `{}`",
executable.user_display()
);
return None;
}

trace!(
"Cached interpreter info for Python {}, skipping probing: `{}`",
cached.data.markers.python_full_version(),
executable.user_display()
);

Some(cached.data)
}
}

/// Find the Python executable that should be considered the "base" for a virtual environment.
Expand Down

0 comments on commit 029ac7d

Please sign in to comment.