Skip to content
This repository was archived by the owner on Jan 11, 2023. It is now read-only.

Commit 5c9f06a

Browse files
committed
updated NVIDIA drivers installation
1 parent 0530b2d commit 5c9f06a

File tree

1 file changed

+38
-16
lines changed

1 file changed

+38
-16
lines changed

pkg/acsengine/engine.go

+38-16
Original file line numberDiff line numberDiff line change
@@ -1671,20 +1671,42 @@ func getPackageGUID(orchestratorType string, orchestratorVersion string, masterC
16711671
func getGPUDriversInstallScript(profile *api.AgentPoolProfile) string {
16721672

16731673
// latest version of the drivers. Later this parameter could be bubbled up so that users can choose specific driver versions.
1674-
dv := "384"
1674+
dv := "384.111"
1675+
dest := "/usr/local/nvidia"
16751676

16761677
/*
16771678
First we remove the nouveau drivers, which are the open source drivers for NVIDIA cards. Nouveau is installed on NV Series VMs by default.
1678-
Then we add the graphics-drivers ppa repository and get the proprietary drivers from there.
1679+
We also installed needed dependencies.
16791680
*/
1680-
ppaScript := fmt.Sprintf(`- rmmod nouveau
1681+
installScript := fmt.Sprintf(`- rmmod nouveau
16811682
- sh -c "echo \"blacklist nouveau\" >> /etc/modprobe.d/blacklist.conf"
16821683
- update-initramfs -u
1683-
- sudo add-apt-repository -y ppa:graphics-drivers
1684-
- sudo apt-get update
1685-
- sudo apt-get install -y nvidia-%s
1686-
- sudo nvidia-smi
1687-
- sudo systemctl restart kubelet`, dv)
1684+
- sudo apt-get update && sudo apt-get install -y linux-headers-$(uname -r) gcc make
1685+
- mkdir -p %s
1686+
- cd %s`, dest, dest)
1687+
1688+
/*
1689+
Download the .run file from NVIDIA.
1690+
Nvidia libraries are always install in /usr/lib/x86_64-linux-gnu, and there is no option in the run file to change this.
1691+
Instead we use Overlayfs to move the newly installed libraries under /usr/local/nvidia/lib64
1692+
*/
1693+
installScript += fmt.Sprintf(`
1694+
- curl -fLS https://us.download.nvidia.com/tesla/%s/NVIDIA-Linux-x86_64-%s.run -o nvidia-drivers-%s
1695+
- mkdir -p lib64 overlay-workdir
1696+
- sudo mount -t overlay -o lowerdir=/usr/lib/x86_64-linux-gnu,upperdir=lib64,workdir=overlay-workdir none /usr/lib/x86_64-linux-gnu`, dv, dv, dv)
1697+
1698+
/*
1699+
Install the drivers and update /etc/ld.so.conf.d/nvidia.conf which will make the libraries discoverable through $LD_LIBRARY_PATH.
1700+
Run nvidia-smi to test the installation, unmount overlayfs and restard kubelet (GPUs are only discovered when kubelet starts)
1701+
*/
1702+
installScript += fmt.Sprintf(`
1703+
- sudo sh nvidia-drivers-%s --silent --accept-license --no-drm --utility-prefix="%s" --opengl-prefix="%s"
1704+
- echo "%s" > /etc/ld.so.conf.d/nvidia.conf
1705+
- sudo ldconfig
1706+
- sudo umount /usr/lib/x86_64-linux-gnu
1707+
- sudo nvidia-modprobe -u -c0
1708+
- sudo %s/bin/nvidia-smi
1709+
- sudo systemctl restart kubelet`, dv, dest, dest, fmt.Sprintf("%s/lib64", dest), dest)
16881710

16891711
// We don't have an agreement in place with NVIDIA to provide the drivers on every sku. For this VMs we simply log a warning message.
16901712
na := getGPUDriversNotInstalledWarningMessage(profile.VMSize)
@@ -1693,14 +1715,14 @@ func getGPUDriversInstallScript(profile *api.AgentPoolProfile) string {
16931715
that we have an agreement with NVIDIA for this specific gpu. Otherwise use the warning message.
16941716
*/
16951717
dm := map[string]string{
1696-
"Standard_NC6": ppaScript,
1697-
"Standard_NC12": ppaScript,
1698-
"Standard_NC24": ppaScript,
1699-
"Standard_NC24r": ppaScript,
1700-
"Standard_NV6": ppaScript,
1701-
"Standard_NV12": ppaScript,
1702-
"Standard_NV24": ppaScript,
1703-
"Standard_NV24r": ppaScript,
1718+
"Standard_NC6": installScript,
1719+
"Standard_NC12": installScript,
1720+
"Standard_NC24": installScript,
1721+
"Standard_NC24r": installScript,
1722+
"Standard_NV6": installScript,
1723+
"Standard_NV12": installScript,
1724+
"Standard_NV24": installScript,
1725+
"Standard_NV24r": installScript,
17041726
"Standard_NC6_v2": na,
17051727
"Standard_NC12_v2": na,
17061728
"Standard_NC24_v2": na,

0 commit comments

Comments
 (0)