diff --git a/docs/DISTRIBUTED.md b/docs/DISTRIBUTED.md index ac100f1b39..45a05bd63c 100644 --- a/docs/DISTRIBUTED.md +++ b/docs/DISTRIBUTED.md @@ -10,6 +10,8 @@ TP splits the model into shards and benefits from fast single-node interconnects > Note: In mistral.rs, if NCCL is enabled, then automatic device mapping *will not* be used. +**Important**: To build for NCCL, be sure to add the `nccl` feature flag (for example: `--features nccl,cuda`). + See the following environment variables: |Name|Function|Usage| diff --git a/mistralrs-bench/Cargo.toml b/mistralrs-bench/Cargo.toml index a078b85196..a5de4be2a8 100644 --- a/mistralrs-bench/Cargo.toml +++ b/mistralrs-bench/Cargo.toml @@ -30,3 +30,4 @@ metal = ["mistralrs-core/metal"] flash-attn = ["cuda", "mistralrs-core/flash-attn"] accelerate = ["mistralrs-core/accelerate"] mkl = ["mistralrs-core/mkl"] +nccl = ["mistralrs-core/nccl"] diff --git a/mistralrs-core/Cargo.toml b/mistralrs-core/Cargo.toml index e017e185c6..91ded26efa 100644 --- a/mistralrs-core/Cargo.toml +++ b/mistralrs-core/Cargo.toml @@ -88,7 +88,6 @@ safetensors.workspace = true pyo3_macros = ["pyo3"] cuda = [ "candle-core/cuda", - "candle-core/nccl", "candle-nn/cuda", "dep:bindgen_cuda", "mistralrs-quant/cuda", @@ -110,6 +109,7 @@ flash-attn = ["cuda", "dep:candle-flash-attn"] flash-attn-v3 = ["cuda", "dep:candle-flash-attn-v3"] accelerate = ["candle-core/accelerate", "candle-nn/accelerate", "mistralrs-quant/accelerate"] mkl = ["candle-core/mkl", "candle-nn/mkl"] +nccl = ["cuda", "mistralrs-quant/nccl"] [build-dependencies] bindgen_cuda = { version = "0.1.5", optional = true } diff --git a/mistralrs-core/src/pipeline/normal.rs b/mistralrs-core/src/pipeline/normal.rs index 06e0576e5a..739b15c47b 100644 --- a/mistralrs-core/src/pipeline/normal.rs +++ b/mistralrs-core/src/pipeline/normal.rs @@ -301,7 +301,8 @@ impl Loader for NormalLoader { let use_nccl = available_devices.iter().all(|dev| dev.is_cuda()) && available_devices.len() > 1 && (std::env::var("MISTRALRS_NO_NCCL").is_err() - || std::env::var("MISTRALRS_NO_NCCL").is_ok_and(|x| x != "1")); + || std::env::var("MISTRALRS_NO_NCCL").is_ok_and(|x| x != "1")) + && cfg!(feature = "nccl"); // If auto, convert to Map if not using nccl if use_nccl { @@ -463,6 +464,11 @@ impl Loader for NormalLoader { let multi_progress = Arc::new(MultiProgress::new()); let mut parallel_models = if use_nccl { + #[cfg(not(feature = "nccl"))] + warn!( + "NCCL support was included in the build, be sure to build with `--features nccl`." + ); + // NCCL case! let pipeline_parallel_size = std::env::var("MISTRALRS_PIPELINE_PARALLEL") diff --git a/mistralrs-quant/Cargo.toml b/mistralrs-quant/Cargo.toml index c66d48aa51..ba62e797ed 100644 --- a/mistralrs-quant/Cargo.toml +++ b/mistralrs-quant/Cargo.toml @@ -32,7 +32,12 @@ safetensors.workspace = true regex.workspace = true [features] -cuda = ["candle-core/cuda", "candle-nn/cuda", "dep:bindgen_cuda"] +cuda = [ + "candle-core/cuda", + "candle-nn/cuda", + "dep:bindgen_cuda" +] +nccl = ["cuda", "candle-core/nccl"] metal = ["candle-core/metal", "candle-nn/metal", "dep:metal"] accelerate = ["candle-core/accelerate", "candle-nn/accelerate"] diff --git a/mistralrs-quant/src/distributed/mod.rs b/mistralrs-quant/src/distributed/mod.rs index 21859ada73..ad72784d44 100644 --- a/mistralrs-quant/src/distributed/mod.rs +++ b/mistralrs-quant/src/distributed/mod.rs @@ -16,7 +16,7 @@ impl BarrierLike for Barrier { } } -#[cfg(feature = "cuda")] +#[cfg(all(feature = "cuda", feature = "nccl"))] mod ops { use std::{fmt::Debug, ops::Deref, sync::Arc}; @@ -165,7 +165,7 @@ mod ops { } } -#[cfg(not(feature = "cuda"))] +#[cfg(not(all(feature = "cuda", feature = "nccl")))] mod ops { use std::sync::Arc; diff --git a/mistralrs-server/Cargo.toml b/mistralrs-server/Cargo.toml index 362b5ed12e..41a5b2a671 100644 --- a/mistralrs-server/Cargo.toml +++ b/mistralrs-server/Cargo.toml @@ -45,3 +45,4 @@ metal = ["mistralrs-core/metal"] flash-attn = ["cuda", "mistralrs-core/flash-attn"] accelerate = ["mistralrs-core/accelerate"] mkl = ["mistralrs-core/mkl"] +nccl = ["mistralrs-core/nccl"] diff --git a/mistralrs/Cargo.toml b/mistralrs/Cargo.toml index 6e2f98ce16..b965ed4771 100644 --- a/mistralrs/Cargo.toml +++ b/mistralrs/Cargo.toml @@ -34,6 +34,7 @@ metal = ["mistralrs-core/metal"] flash-attn = ["cuda", "mistralrs-core/flash-attn"] accelerate = ["mistralrs-core/accelerate"] mkl = ["mistralrs-core/mkl"] +nccl = ["mistralrs-core/nccl"] [[example]] name = "simple"